Merge tag 'pm+acpi-3.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafae...
[firefly-linux-kernel-4.4.55.git] / net / core / dev.c
index 7fe82929f5094ee37a49dccd9df3a3e2b63a64c7..d030575532a22a778170bcdbff8dac8566cdf120 100644 (file)
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 static inline struct list_head *ptype_head(const struct packet_type *pt)
 {
        if (pt->type == htons(ETH_P_ALL))
-               return &ptype_all;
+               return pt->dev ? &pt->dev->ptype_all : &ptype_all;
        else
-               return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+               return pt->dev ? &pt->dev->ptype_specific :
+                                &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 }
 
 /**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
        return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+                                         struct packet_type **pt,
+                                         struct net_device *dev, __be16 type,
+                                         struct list_head *ptype_list)
+{
+       struct packet_type *ptype, *pt_prev = *pt;
+
+       list_for_each_entry_rcu(ptype, ptype_list, list) {
+               if (ptype->type != type)
+                       continue;
+               if (pt_prev)
+                       deliver_skb(skb, pt_prev, dev);
+               pt_prev = ptype;
+       }
+       *pt = pt_prev;
+}
+
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
        if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
        struct packet_type *ptype;
        struct sk_buff *skb2 = NULL;
        struct packet_type *pt_prev = NULL;
+       struct list_head *ptype_list = &ptype_all;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+       list_for_each_entry_rcu(ptype, ptype_list, list) {
                /* Never send packets back to the socket
                 * they originated from - MvS (miquels@drinkel.ow.org)
                 */
-               if ((ptype->dev == dev || !ptype->dev) &&
-                   (!skb_loop_sk(ptype, skb))) {
-                       if (pt_prev) {
-                               deliver_skb(skb2, pt_prev, skb->dev);
-                               pt_prev = ptype;
-                               continue;
-                       }
+               if (skb_loop_sk(ptype, skb))
+                       continue;
 
-                       skb2 = skb_clone(skb, GFP_ATOMIC);
-                       if (!skb2)
-                               break;
+               if (pt_prev) {
+                       deliver_skb(skb2, pt_prev, skb->dev);
+                       pt_prev = ptype;
+                       continue;
+               }
 
-                       net_timestamp_set(skb2);
+               /* need to clone skb, done only once */
+               skb2 = skb_clone(skb, GFP_ATOMIC);
+               if (!skb2)
+                       goto out_unlock;
 
-                       /* skb->nh should be correctly
-                          set by sender, so that the second statement is
-                          just protection against buggy protocols.
-                        */
-                       skb_reset_mac_header(skb2);
-
-                       if (skb_network_header(skb2) < skb2->data ||
-                           skb_network_header(skb2) > skb_tail_pointer(skb2)) {
-                               net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
-                                                    ntohs(skb2->protocol),
-                                                    dev->name);
-                               skb_reset_network_header(skb2);
-                       }
+               net_timestamp_set(skb2);
 
-                       skb2->transport_header = skb2->network_header;
-                       skb2->pkt_type = PACKET_OUTGOING;
-                       pt_prev = ptype;
+               /* skb->nh should be correctly
+                * set by sender, so that the second statement is
+                * just protection against buggy protocols.
+                */
+               skb_reset_mac_header(skb2);
+
+               if (skb_network_header(skb2) < skb2->data ||
+                   skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+                       net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+                                            ntohs(skb2->protocol),
+                                            dev->name);
+                       skb_reset_network_header(skb2);
                }
+
+               skb2->transport_header = skb2->network_header;
+               skb2->pkt_type = PACKET_OUTGOING;
+               pt_prev = ptype;
        }
+
+       if (ptype_list == &ptype_all) {
+               ptype_list = &dev->ptype_all;
+               goto again;
+       }
+out_unlock:
        if (pt_prev)
                pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
        rcu_read_unlock();
@@ -2549,7 +2576,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
        if (skb->encapsulation)
                features &= dev->hw_enc_features;
 
-       if (!vlan_tx_tag_present(skb)) {
+       if (!skb_vlan_tag_present(skb)) {
                if (unlikely(protocol == htons(ETH_P_8021Q) ||
                             protocol == htons(ETH_P_8021AD))) {
                        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2588,7 +2615,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
        unsigned int len;
        int rc;
 
-       if (!list_empty(&ptype_all))
+       if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
                dev_queue_xmit_nit(skb, dev);
 
        len = skb->len;
@@ -2630,7 +2657,7 @@ out:
 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
                                          netdev_features_t features)
 {
-       if (vlan_tx_tag_present(skb) &&
+       if (skb_vlan_tag_present(skb) &&
            !vlan_hw_offload_capable(features, skb->vlan_proto))
                skb = __vlan_hwaccel_push_inside(skb);
        return skb;
@@ -3003,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 /* One global table that all flow-based protocols share. */
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
+u32 rps_cpu_mask __read_mostly;
+EXPORT_SYMBOL(rps_cpu_mask);
 
 struct static_key rps_needed __read_mostly;
 
@@ -3059,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                       struct rps_dev_flow **rflowp)
 {
-       struct netdev_rx_queue *rxqueue;
-       struct rps_map *map;
+       const struct rps_sock_flow_table *sock_flow_table;
+       struct netdev_rx_queue *rxqueue = dev->_rx;
        struct rps_dev_flow_table *flow_table;
-       struct rps_sock_flow_table *sock_flow_table;
+       struct rps_map *map;
        int cpu = -1;
-       u16 tcpu;
+       u32 tcpu;
        u32 hash;
 
        if (skb_rx_queue_recorded(skb)) {
                u16 index = skb_get_rx_queue(skb);
+
                if (unlikely(index >= dev->real_num_rx_queues)) {
                        WARN_ONCE(dev->real_num_rx_queues > 1,
                                  "%s received packet on queue %u, but number "
@@ -3076,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                                  dev->name, index, dev->real_num_rx_queues);
                        goto done;
                }
-               rxqueue = dev->_rx + index;
-       } else
-               rxqueue = dev->_rx;
+               rxqueue += index;
+       }
 
+       /* Avoid computing hash if RFS/RPS is not active for this rxqueue */
+
+       flow_table = rcu_dereference(rxqueue->rps_flow_table);
        map = rcu_dereference(rxqueue->rps_map);
-       if (map) {
-               if (map->len == 1 &&
-                   !rcu_access_pointer(rxqueue->rps_flow_table)) {
-                       tcpu = map->cpus[0];
-                       if (cpu_online(tcpu))
-                               cpu = tcpu;
-                       goto done;
-               }
-       } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
+       if (!flow_table && !map)
                goto done;
-       }
 
        skb_reset_network_header(skb);
        hash = skb_get_hash(skb);
        if (!hash)
                goto done;
 
-       flow_table = rcu_dereference(rxqueue->rps_flow_table);
        sock_flow_table = rcu_dereference(rps_sock_flow_table);
        if (flow_table && sock_flow_table) {
-               u16 next_cpu;
                struct rps_dev_flow *rflow;
+               u32 next_cpu;
+               u32 ident;
+
+               /* First check into global flow table if there is a match */
+               ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+               if ((ident ^ hash) & ~rps_cpu_mask)
+                       goto try_rps;
+
+               next_cpu = ident & rps_cpu_mask;
 
+               /* OK, now we know there is a match,
+                * we can look at the local (per receive queue) flow table
+                */
                rflow = &flow_table->flows[hash & flow_table->mask];
                tcpu = rflow->cpu;
 
-               next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
-
                /*
                 * If the desired CPU (where last recvmsg was done) is
                 * different from current CPU (one in the rx-queue flow
@@ -3135,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                }
        }
 
+try_rps:
+
        if (map) {
                tcpu = map->cpus[reciprocal_scale(hash, map->len)];
                if (cpu_online(tcpu)) {
@@ -3586,7 +3619,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
        struct packet_type *ptype, *pt_prev;
        rx_handler_func_t *rx_handler;
        struct net_device *orig_dev;
-       struct net_device *null_or_dev;
        bool deliver_exact = false;
        int ret = NET_RX_DROP;
        __be16 type;
@@ -3629,11 +3661,15 @@ another_round:
                goto skip_taps;
 
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
-               if (!ptype->dev || ptype->dev == skb->dev) {
-                       if (pt_prev)
-                               ret = deliver_skb(skb, pt_prev, orig_dev);
-                       pt_prev = ptype;
-               }
+               if (pt_prev)
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+               pt_prev = ptype;
+       }
+
+       list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+               if (pt_prev)
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+               pt_prev = ptype;
        }
 
 skip_taps:
@@ -3647,7 +3683,7 @@ ncls:
        if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
                goto drop;
 
-       if (vlan_tx_tag_present(skb)) {
+       if (skb_vlan_tag_present(skb)) {
                if (pt_prev) {
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = NULL;
@@ -3679,8 +3715,8 @@ ncls:
                }
        }
 
-       if (unlikely(vlan_tx_tag_present(skb))) {
-               if (vlan_tx_tag_get_id(skb))
+       if (unlikely(skb_vlan_tag_present(skb))) {
+               if (skb_vlan_tag_get_id(skb))
                        skb->pkt_type = PACKET_OTHERHOST;
                /* Note: we might in the future use prio bits
                 * and set skb->priority like in vlan_do_receive()
@@ -3689,19 +3725,21 @@ ncls:
                skb->vlan_tci = 0;
        }
 
+       type = skb->protocol;
+
        /* deliver only exact match when indicated */
-       null_or_dev = deliver_exact ? skb->dev : NULL;
+       if (likely(!deliver_exact)) {
+               deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+                                      &ptype_base[ntohs(type) &
+                                                  PTYPE_HASH_MASK]);
+       }
 
-       type = skb->protocol;
-       list_for_each_entry_rcu(ptype,
-                       &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-               if (ptype->type == type &&
-                   (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
-                    ptype->dev == orig_dev)) {
-                       if (pt_prev)
-                               ret = deliver_skb(skb, pt_prev, orig_dev);
-                       pt_prev = ptype;
-               }
+       deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+                              &orig_dev->ptype_specific);
+
+       if (unlikely(skb->dev != orig_dev)) {
+               deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+                                      &skb->dev->ptype_specific);
        }
 
        if (pt_prev) {
@@ -5294,6 +5332,26 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @netdev_bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+                               struct netdev_bonding_info *bonding_info)
+{
+       struct netdev_notifier_bonding_info     info;
+
+       memcpy(&info.bonding_info, bonding_info,
+              sizeof(struct netdev_bonding_info));
+       call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+                                     &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
 static void netdev_adjacent_add_links(struct net_device *dev)
 {
        struct netdev_adjacent *iter;
@@ -6143,13 +6201,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 {
        unsigned int i, count = dev->num_rx_queues;
        struct netdev_rx_queue *rx;
+       size_t sz = count * sizeof(*rx);
 
        BUG_ON(count < 1);
 
-       rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-       if (!rx)
-               return -ENOMEM;
-
+       rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+       if (!rx) {
+               rx = vzalloc(sz);
+               if (!rx)
+                       return -ENOMEM;
+       }
        dev->_rx = rx;
 
        for (i = 0; i < count; i++)
@@ -6547,6 +6608,8 @@ void netdev_run_todo(void)
 
                /* paranoia */
                BUG_ON(netdev_refcnt_read(dev));
+               BUG_ON(!list_empty(&dev->ptype_all));
+               BUG_ON(!list_empty(&dev->ptype_specific));
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));
                WARN_ON(dev->dn_ptr);
@@ -6729,6 +6792,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        INIT_LIST_HEAD(&dev->adj_list.lower);
        INIT_LIST_HEAD(&dev->all_adj_list.upper);
        INIT_LIST_HEAD(&dev->all_adj_list.lower);
+       INIT_LIST_HEAD(&dev->ptype_all);
+       INIT_LIST_HEAD(&dev->ptype_specific);
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);
 
@@ -6779,7 +6844,7 @@ void free_netdev(struct net_device *dev)
 
        netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
-       kfree(dev->_rx);
+       kvfree(dev->_rx);
 #endif
 
        kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -7064,11 +7129,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 
        /* Process offline CPU's input_pkt_queue */
        while ((skb = __skb_dequeue(&oldsd->process_queue))) {
-               netif_rx_internal(skb);
+               netif_rx_ni(skb);
                input_queue_head_incr(oldsd);
        }
        while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
-               netif_rx_internal(skb);
+               netif_rx_ni(skb);
                input_queue_head_incr(oldsd);
        }