Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

author David S. Miller <davem@davemloft.net>

Tue, 14 Apr 2015 19:44:14 +0000 (15:44 -0400)

committer David S. Miller <davem@davemloft.net>

Tue, 14 Apr 2015 19:44:14 +0000 (15:44 -0400)
author David S. Miller <davem@davemloft.net>
Tue, 14 Apr 2015 19:44:14 +0000 (15:44 -0400)
committer David S. Miller <davem@davemloft.net>
Tue, 14 Apr 2015 19:44:14 +0000 (15:44 -0400)
diff --combined drivers/net/ethernet/emulex/benet/be.h

index 4b0494b9cc7cf034e8ebdc190d08e46a8a1e790e,204ec43438c4d8edc6ee2e9a0bdaa39023a9718e..1bf1cdce74ac3591d4a2011e6be9399c4a5cdf57
--- 1/drivers/net/ethernet/emulex/benet/be.h
--- 2/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@@ -30,12 -30,11 +30,12 @@@
   #include <linux/firmware.h>
   #include <linux/slab.h>
   #include <linux/u64_stats_sync.h>
+ +#include <linux/cpumask.h>
   
   #include "be_hw.h"
   #include "be_roce.h"
   
- -#define DRV_VER                       "10.4u"
+ +#define DRV_VER                       "10.6.0.1"
   #define DRV_NAME              "be2net"
   #define BE_NAME                       "Emulex BladeEngine2"
   #define BE3_NAME              "Emulex BladeEngine3"
@@@ -88,7 -87,6 +88,7 @@@
   #define BE3_MAX_EVT_QS                16
   #define BE3_SRIOV_MAX_EVT_QS  8
   
+ +#define MAX_RSS_IFACES                15
   #define MAX_RX_QS             32
   #define MAX_EVT_QS            32
   #define MAX_TX_QS             32
@@@ -99,6 -97,7 +99,7 @@@
   #define BE_NAPI_WEIGHT                64
   #define MAX_RX_POST           BE_NAPI_WEIGHT /* Frags posted at a time */
   #define RX_FRAGS_REFILL_WM    (RX_Q_LEN - MAX_RX_POST)
+ #define MAX_NUM_POST_ERX_DB   255u
   
   #define MAX_VFS                       30 /* Max VFs supported by BE3 FW */
   #define FW_VER_LEN            32
@@@ -184,7 -183,6 +185,7 @@@ struct be_eq_obj 
         u16 spurious_intr;
         struct napi_struct napi;
         struct be_adapter *adapter;
+ +      cpumask_var_t  affinity_mask;
   
   #ifdef CONFIG_NET_RX_BUSY_POLL
   #define BE_EQ_IDLE            0
@@@ -241,17 -239,10 +242,17 @@@ struct be_tx_stats 
         struct u64_stats_sync sync_compl;
   };
   
+ +/* Structure to hold some data of interest obtained from a TX CQE */
+ +struct be_tx_compl_info {
+ +      u8 status;              /* Completion status */
+ +      u16 end_index;          /* Completed TXQ Index */
+ +};
+ +
   struct be_tx_obj {
         u32 db_offset;
         struct be_queue_info q;
         struct be_queue_info cq;
+ +      struct be_tx_compl_info txcp;
         /* Remember the skbs that were transmitted */
         struct sk_buff *sent_skb_list[TX_Q_LEN];
         struct be_tx_stats stats;
@@@ -380,7 -371,6 +381,7 @@@ enum vf_state 
   #define BE_FLAGS_VXLAN_OFFLOADS                       BIT(8)
   #define BE_FLAGS_SETUP_DONE                   BIT(9)
   #define BE_FLAGS_EVT_INCOMPATIBLE_SFP         BIT(10)
+ +#define BE_FLAGS_ERR_DETECTION_SCHEDULED      BIT(11)
   
   #define BE_UC_PMAC_COUNT                      30
   #define BE_VF_UC_PMAC_COUNT                   2
@@@ -415,11 -405,8 +416,11 @@@ struct be_resources 
         u16 max_tx_qs;
         u16 max_rss_qs;
         u16 max_rx_qs;
+ +      u16 max_cq_count;
         u16 max_uc_mac;         /* Max UC MACs programmable */
         u16 max_vlans;          /* Number of vlans supported */
+ +      u16 max_iface_count;
+ +      u16 max_mcc_count;
         u16 max_evt_qs;
         u32 if_cap_flags;
         u32 vf_if_cap_flags;    /* VF if capability flags */
@@@ -432,39 -419,6 +433,39 @@@ struct rss_info 
         u8 rss_hkey[RSS_HASH_KEY_LEN];
   };
   
+ +/* Macros to read/write the 'features' word of be_wrb_params structure.
+ + */
+ +#define       BE_WRB_F_BIT(name)                      BE_WRB_F_##name##_BIT
+ +#define       BE_WRB_F_MASK(name)                     BIT_MASK(BE_WRB_F_##name##_BIT)
+ +
+ +#define       BE_WRB_F_GET(word, name)        \
+ +      (((word) & (BE_WRB_F_MASK(name))) >> BE_WRB_F_BIT(name))
+ +
+ +#define       BE_WRB_F_SET(word, name, val)   \
+ +      ((word) |= (((val) << BE_WRB_F_BIT(name)) & BE_WRB_F_MASK(name)))
+ +
+ +/* Feature/offload bits */
+ +enum {
+ +      BE_WRB_F_CRC_BIT,               /* Ethernet CRC */
+ +      BE_WRB_F_IPCS_BIT,              /* IP csum */
+ +      BE_WRB_F_TCPCS_BIT,             /* TCP csum */
+ +      BE_WRB_F_UDPCS_BIT,             /* UDP csum */
+ +      BE_WRB_F_LSO_BIT,               /* LSO */
+ +      BE_WRB_F_LSO6_BIT,              /* LSO6 */
+ +      BE_WRB_F_VLAN_BIT,              /* VLAN */
+ +      BE_WRB_F_VLAN_SKIP_HW_BIT       /* Skip VLAN tag (workaround) */
+ +};
+ +
+ +/* The structure below provides a HW-agnostic abstraction of WRB params
+ + * retrieved from a TX skb. This is in turn passed to chip specific routines
+ + * during transmit, to set the corresponding params in the WRB.
+ + */
+ +struct be_wrb_params {
+ +      u32 features;   /* Feature bits */
+ +      u16 vlan_tag;   /* VLAN tag */
+ +      u16 lso_mss;    /* MSS for LSO */
+ +};
+ +
   struct be_adapter {
         struct pci_dev *pdev;
         struct net_device *netdev;
@@@ -496,8 -450,6 +497,8 @@@
   
         /* Rx rings */
         u16 num_rx_qs;
+ +      u16 num_rss_qs;
+ +      u16 need_def_rxq;
         struct be_rx_obj rx_obj[MAX_RX_QS];
         u32 big_page_size;      /* Compounded page size shared by rx wrbs */
   
@@@ -512,7 -464,7 +513,7 @@@
         struct delayed_work work;
         u16 work_counter;
   
- -      struct delayed_work func_recovery_work;
+ +      struct delayed_work be_err_detection_work;
         u32 flags;
         u32 cmd_privileges;
         /* Ethtool knobs and info */
@@@ -645,8 -597,9 +646,8 @@@ extern const struct ethtool_ops be_etht
         for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rx_qs;  \
                 i++, rxo++)
   
- -/* Skip the default non-rss queue (last one)*/
   #define for_all_rss_queues(adapter, rxo, i)                           \
- -      for (i = 0, rxo = &adapter->rx_obj[i]; i < (adapter->num_rx_qs - 1);\
+ +      for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rss_qs; \
                 i++, rxo++)
   
   #define for_all_tx_queues(adapter, txo, i)                            \
diff --combined drivers/net/ethernet/emulex/benet/be_main.c

index 5ff7fba9b67c9d39043d1094193db714f7625a6b,ad2b5094a498f2b53be8700e52b330ffba5e364a..fb0bc3c3620e9cf87983b1c425e0f24d431bffc9
--- 1/drivers/net/ethernet/emulex/benet/be_main.c
--- 2/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@@ -30,9 -30,6 +30,9 @@@ MODULE_DESCRIPTION(DRV_DESC " " DRV_VER
   MODULE_AUTHOR("Emulex Corporation");
   MODULE_LICENSE("GPL");
   
+ +/* num_vfs module param is obsolete.
+ + * Use sysfs method to enable/disable VFs.
+ + */
   static unsigned int num_vfs;
   module_param(num_vfs, uint, S_IRUGO);
   MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
@@@ -730,86 -727,48 +730,86 @@@ static u16 skb_ip_proto(struct sk_buff 
                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
   }
   
- -static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr,
- -                       struct sk_buff *skb, u32 wrb_cnt, u32 len,
- -                       bool skip_hw_vlan)
+ +static inline bool be_is_txq_full(struct be_tx_obj *txo)
   {
- -      u16 vlan_tag, proto;
+ +      return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
+ +}
   
- -      memset(hdr, 0, sizeof(*hdr));
+ +static inline bool be_can_txq_wake(struct be_tx_obj *txo)
+ +{
+ +      return atomic_read(&txo->q.used) < txo->q.len / 2;
+ +}
+ +
+ +static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
+ +{
+ +      return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
+ +}
   
- -      SET_TX_WRB_HDR_BITS(crc, hdr, 1);
+ +static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
+ +                                     struct sk_buff *skb,
+ +                                     struct be_wrb_params *wrb_params)
+ +{
+ +      u16 proto;
   
         if (skb_is_gso(skb)) {
- -              SET_TX_WRB_HDR_BITS(lso, hdr, 1);
- -              SET_TX_WRB_HDR_BITS(lso_mss, hdr, skb_shinfo(skb)->gso_size);
+ +              BE_WRB_F_SET(wrb_params->features, LSO, 1);
+ +              wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
- -                      SET_TX_WRB_HDR_BITS(lso6, hdr, 1);
+ +                      BE_WRB_F_SET(wrb_params->features, LSO6, 1);
         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
                 if (skb->encapsulation) {
- -                      SET_TX_WRB_HDR_BITS(ipcs, hdr, 1);
+ +                      BE_WRB_F_SET(wrb_params->features, IPCS, 1);
                         proto = skb_inner_ip_proto(skb);
                 } else {
                         proto = skb_ip_proto(skb);
                 }
                 if (proto == IPPROTO_TCP)
- -                      SET_TX_WRB_HDR_BITS(tcpcs, hdr, 1);
+ +                      BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
                 else if (proto == IPPROTO_UDP)
- -                      SET_TX_WRB_HDR_BITS(udpcs, hdr, 1);
+ +                      BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
         }
   
         if (skb_vlan_tag_present(skb)) {
- -              SET_TX_WRB_HDR_BITS(vlan, hdr, 1);
- -              vlan_tag = be_get_tx_vlan_tag(adapter, skb);
- -              SET_TX_WRB_HDR_BITS(vlan_tag, hdr, vlan_tag);
+ +              BE_WRB_F_SET(wrb_params->features, VLAN, 1);
+ +              wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
         }
   
- -      SET_TX_WRB_HDR_BITS(num_wrb, hdr, wrb_cnt);
- -      SET_TX_WRB_HDR_BITS(len, hdr, len);
+ +      BE_WRB_F_SET(wrb_params->features, CRC, 1);
+ +}
+ +
+ +static void wrb_fill_hdr(struct be_adapter *adapter,
+ +                       struct be_eth_hdr_wrb *hdr,
+ +                       struct be_wrb_params *wrb_params,
+ +                       struct sk_buff *skb)
+ +{
+ +      memset(hdr, 0, sizeof(*hdr));
   
- -      /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0
- -       * When this hack is not needed, the evt bit is set while ringing DB
+ +      SET_TX_WRB_HDR_BITS(crc, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, CRC));
+ +      SET_TX_WRB_HDR_BITS(ipcs, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, IPCS));
+ +      SET_TX_WRB_HDR_BITS(tcpcs, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, TCPCS));
+ +      SET_TX_WRB_HDR_BITS(udpcs, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, UDPCS));
+ +
+ +      SET_TX_WRB_HDR_BITS(lso, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, LSO));
+ +      SET_TX_WRB_HDR_BITS(lso6, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, LSO6));
+ +      SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
+ +
+ +      /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
+ +       * hack is not needed, the evt bit is set while ringing DB.
          */
- -      if (skip_hw_vlan)
- -              SET_TX_WRB_HDR_BITS(event, hdr, 1);
+ +      SET_TX_WRB_HDR_BITS(event, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
+ +      SET_TX_WRB_HDR_BITS(vlan, hdr,
+ +                          BE_WRB_F_GET(wrb_params->features, VLAN));
+ +      SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
+ +
+ +      SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
+ +      SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
   }
   
   static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
@@@ -829,124 -788,77 +829,124 @@@
         }
   }
   
- -/* Returns the number of WRBs used up by the skb */
+ +/* Grab a WRB header for xmit */
+ +static u16 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
+ +{
+ +      u16 head = txo->q.head;
+ +
+ +      queue_head_inc(&txo->q);
+ +      return head;
+ +}
+ +
+ +/* Set up the WRB header for xmit */
+ +static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
+ +                              struct be_tx_obj *txo,
+ +                              struct be_wrb_params *wrb_params,
+ +                              struct sk_buff *skb, u16 head)
+ +{
+ +      u32 num_frags = skb_wrb_cnt(skb);
+ +      struct be_queue_info *txq = &txo->q;
+ +      struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
+ +
+ +      wrb_fill_hdr(adapter, hdr, wrb_params, skb);
+ +      be_dws_cpu_to_le(hdr, sizeof(*hdr));
+ +
+ +      BUG_ON(txo->sent_skb_list[head]);
+ +      txo->sent_skb_list[head] = skb;
+ +      txo->last_req_hdr = head;
+ +      atomic_add(num_frags, &txq->used);
+ +      txo->last_req_wrb_cnt = num_frags;
+ +      txo->pend_wrb_cnt += num_frags;
+ +}
+ +
+ +/* Setup a WRB fragment (buffer descriptor) for xmit */
+ +static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
+ +                               int len)
+ +{
+ +      struct be_eth_wrb *wrb;
+ +      struct be_queue_info *txq = &txo->q;
+ +
+ +      wrb = queue_head_node(txq);
+ +      wrb_fill(wrb, busaddr, len);
+ +      queue_head_inc(txq);
+ +}
+ +
+ +/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
+ + * was invoked. The producer index is restored to the previous packet and the
+ + * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
+ + */
+ +static void be_xmit_restore(struct be_adapter *adapter,
+ +                          struct be_tx_obj *txo, u16 head, bool map_single,
+ +                          u32 copied)
+ +{
+ +      struct device *dev;
+ +      struct be_eth_wrb *wrb;
+ +      struct be_queue_info *txq = &txo->q;
+ +
+ +      dev = &adapter->pdev->dev;
+ +      txq->head = head;
+ +
+ +      /* skip the first wrb (hdr); it's not mapped */
+ +      queue_head_inc(txq);
+ +      while (copied) {
+ +              wrb = queue_head_node(txq);
+ +              unmap_tx_frag(dev, wrb, map_single);
+ +              map_single = false;
+ +              copied -= le32_to_cpu(wrb->frag_len);
+ +              queue_head_inc(txq);
+ +      }
+ +
+ +      txq->head = head;
+ +}
+ +
+ +/* Enqueue the given packet for transmit. This routine allocates WRBs for the
+ + * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
+ + * of WRBs used up by the packet.
+ + */
   static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
- -                         struct sk_buff *skb, bool skip_hw_vlan)
+ +                         struct sk_buff *skb,
+ +                         struct be_wrb_params *wrb_params)
   {
         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
         struct device *dev = &adapter->pdev->dev;
         struct be_queue_info *txq = &txo->q;
- -      struct be_eth_hdr_wrb *hdr;
         bool map_single = false;
- -      struct be_eth_wrb *wrb;
- -      dma_addr_t busaddr;
         u16 head = txq->head;
+ +      dma_addr_t busaddr;
+ +      int len;
   
- -      hdr = queue_head_node(txq);
- -      wrb_fill_hdr(adapter, hdr, skb, wrb_cnt, skb->len, skip_hw_vlan);
- -      be_dws_cpu_to_le(hdr, sizeof(*hdr));
- -
- -      queue_head_inc(txq);
+ +      head = be_tx_get_wrb_hdr(txo);
   
         if (skb->len > skb->data_len) {
- -              int len = skb_headlen(skb);
+ +              len = skb_headlen(skb);
   
                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
                 if (dma_mapping_error(dev, busaddr))
                         goto dma_err;
                 map_single = true;
- -              wrb = queue_head_node(txq);
- -              wrb_fill(wrb, busaddr, len);
- -              queue_head_inc(txq);
+ +              be_tx_setup_wrb_frag(txo, busaddr, len);
                 copied += len;
         }
   
         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+ +              len = skb_frag_size(frag);
   
- -              busaddr = skb_frag_dma_map(dev, frag, 0,
- -                                         skb_frag_size(frag), DMA_TO_DEVICE);
+ +              busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
                 if (dma_mapping_error(dev, busaddr))
                         goto dma_err;
- -              wrb = queue_head_node(txq);
- -              wrb_fill(wrb, busaddr, skb_frag_size(frag));
- -              queue_head_inc(txq);
- -              copied += skb_frag_size(frag);
+ +              be_tx_setup_wrb_frag(txo, busaddr, len);
+ +              copied += len;
         }
   
- -      BUG_ON(txo->sent_skb_list[head]);
- -      txo->sent_skb_list[head] = skb;
- -      txo->last_req_hdr = head;
- -      atomic_add(wrb_cnt, &txq->used);
- -      txo->last_req_wrb_cnt = wrb_cnt;
- -      txo->pend_wrb_cnt += wrb_cnt;
+ +      be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
   
         be_tx_stats_update(txo, skb);
         return wrb_cnt;
   
   dma_err:
- -      /* Bring the queue back to the state it was in before this
- -       * routine was invoked.
- -       */
- -      txq->head = head;
- -      /* skip the first wrb (hdr); it's not mapped */
- -      queue_head_inc(txq);
- -      while (copied) {
- -              wrb = queue_head_node(txq);
- -              unmap_tx_frag(dev, wrb, map_single);
- -              map_single = false;
- -              copied -= le32_to_cpu(wrb->frag_len);
- -              adapter->drv_stats.dma_map_errors++;
- -              queue_head_inc(txq);
- -      }
- -      txq->head = head;
+ +      adapter->drv_stats.dma_map_errors++;
+ +      be_xmit_restore(adapter, txo, head, map_single, copied);
         return 0;
   }
   
@@@ -957,8 -869,7 +957,8 @@@ static inline int qnq_async_evt_rcvd(st
   
   static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
                                              struct sk_buff *skb,
- -                                           bool *skip_hw_vlan)
+ +                                           struct be_wrb_params
+ +                                           *wrb_params)
   {
         u16 vlan_tag = 0;
   
@@@ -975,7 -886,8 +975,7 @@@
                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
                  * skip VLAN insertion
                  */
- -              if (skip_hw_vlan)
- -                      *skip_hw_vlan = true;
+ +              BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
         }
   
         if (vlan_tag) {
@@@ -993,7 -905,8 +993,7 @@@
                                                 vlan_tag);
                 if (unlikely(!skb))
                         return skb;
- -              if (skip_hw_vlan)
- -                      *skip_hw_vlan = true;
+ +              BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
         }
   
         return skb;
@@@ -1033,8 -946,7 +1033,8 @@@ static int be_ipv6_tx_stall_chk(struct 
   
   static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
                                                   struct sk_buff *skb,
- -                                                bool *skip_hw_vlan)
+ +                                                struct be_wrb_params
+ +                                                *wrb_params)
   {
         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
         unsigned int eth_hdr_len;
@@@ -1058,7 -970,7 +1058,7 @@@
          */
         if (be_pvid_tagging_enabled(adapter) &&
             veh->h_vlan_proto == htons(ETH_P_8021Q))
- -              *skip_hw_vlan = true;
+ +              BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
   
         /* HW has a bug wherein it will calculate CSUM for VLAN
          * pkts even though it is disabled.
@@@ -1066,7 -978,7 +1066,7 @@@
          */
         if (skb->ip_summed != CHECKSUM_PARTIAL &&
             skb_vlan_tag_present(skb)) {
- -              skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan);
+ +              skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
                 if (unlikely(!skb))
                         goto err;
         }
@@@ -1088,7 -1000,7 +1088,7 @@@
          */
         if (be_ipv6_tx_stall_chk(adapter, skb) &&
             be_vlan_tag_tx_chk(adapter, skb)) {
- -              skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan);
+ +              skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
                 if (unlikely(!skb))
                         goto err;
         }
@@@ -1102,7 -1014,7 +1102,7 @@@ err
   
   static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
                                            struct sk_buff *skb,
- -                                         bool *skip_hw_vlan)
+ +                                         struct be_wrb_params *wrb_params)
   {
         /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or
          * less may cause a transmit stall on that port. So the work-around is
@@@ -1114,7 -1026,7 +1114,7 @@@
         }
   
         if (BEx_chip(adapter) || lancer_chip(adapter)) {
- -              skb = be_lancer_xmit_workarounds(adapter, skb, skip_hw_vlan);
+ +              skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
                 if (!skb)
                         return NULL;
         }
@@@ -1148,26 -1060,24 +1148,26 @@@ static void be_xmit_flush(struct be_ada
   
   static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
   {
- -      bool skip_hw_vlan = false, flush = !skb->xmit_more;
         struct be_adapter *adapter = netdev_priv(netdev);
         u16 q_idx = skb_get_queue_mapping(skb);
         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
- -      struct be_queue_info *txq = &txo->q;
+ +      struct be_wrb_params wrb_params = { 0 };
+ +      bool flush = !skb->xmit_more;
         u16 wrb_cnt;
   
- -      skb = be_xmit_workarounds(adapter, skb, &skip_hw_vlan);
+ +      skb = be_xmit_workarounds(adapter, skb, &wrb_params);
         if (unlikely(!skb))
                 goto drop;
   
- -      wrb_cnt = be_xmit_enqueue(adapter, txo, skb, skip_hw_vlan);
+ +      be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
+ +
+ +      wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
         if (unlikely(!wrb_cnt)) {
                 dev_kfree_skb_any(skb);
                 goto drop;
         }
   
- -      if ((atomic_read(&txq->used) + BE_MAX_TX_FRAG_COUNT) >= txq->len) {
+ +      if (be_is_txq_full(txo)) {
                 netif_stop_subqueue(netdev, q_idx);
                 tx_stats(txo)->tx_stops++;
         }
@@@ -2122,7 -2032,7 +2122,7 @@@ static void be_post_rx_frags(struct be_
                 if (rxo->rx_post_starved)
                         rxo->rx_post_starved = false;
                 do {
-                       notify = min(256u, posted);
+                       notify = min(MAX_NUM_POST_ERX_DB, posted);
                         be_rxq_notify(adapter, rxq->id, notify);
                         posted -= notify;
                 } while (posted);
@@@ -2132,23 -2042,18 +2132,23 @@@
         }
   }
   
- -static struct be_eth_tx_compl *be_tx_compl_get(struct be_queue_info *tx_cq)
+ +static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
   {
- -      struct be_eth_tx_compl *txcp = queue_tail_node(tx_cq);
+ +      struct be_queue_info *tx_cq = &txo->cq;
+ +      struct be_tx_compl_info *txcp = &txo->txcp;
+ +      struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
   
- -      if (txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
+ +      if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
                 return NULL;
   
+ +      /* Ensure load ordering of valid bit dword and other dwords below */
         rmb();
- -      be_dws_le_to_cpu(txcp, sizeof(*txcp));
+ +      be_dws_le_to_cpu(compl, sizeof(*compl));
   
- -      txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
+ +      txcp->status = GET_TX_COMPL_BITS(status, compl);
+ +      txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
   
+ +      compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
         queue_tail_inc(tx_cq);
         return txcp;
   }
@@@ -2269,9 -2174,9 +2269,9 @@@ static void be_tx_compl_clean(struct be
   {
         u16 end_idx, notified_idx, cmpl = 0, timeo = 0, num_wrbs = 0;
         struct device *dev = &adapter->pdev->dev;
- -      struct be_tx_obj *txo;
+ +      struct be_tx_compl_info *txcp;
         struct be_queue_info *txq;
- -      struct be_eth_tx_compl *txcp;
+ +      struct be_tx_obj *txo;
         int i, pending_txqs;
   
         /* Stop polling for compls when HW has been silent for 10ms */
@@@ -2282,10 -2187,10 +2282,10 @@@
                         cmpl = 0;
                         num_wrbs = 0;
                         txq = &txo->q;
- -                      while ((txcp = be_tx_compl_get(&txo->cq))) {
- -                              end_idx = GET_TX_COMPL_BITS(wrb_index, txcp);
- -                              num_wrbs += be_tx_compl_process(adapter, txo,
- -                                                              end_idx);
+ +                      while ((txcp = be_tx_compl_get(txo))) {
+ +                              num_wrbs +=
+ +                                      be_tx_compl_process(adapter, txo,
+ +                                                          txcp->end_index);
                                 cmpl++;
                         }
                         if (cmpl) {
@@@ -2293,7 -2198,7 +2293,7 @@@
                                 atomic_sub(num_wrbs, &txq->used);
                                 timeo = 0;
                         }
- -                      if (atomic_read(&txq->used) == txo->pend_wrb_cnt)
+ +                      if (!be_is_tx_compl_pending(txo))
                                 pending_txqs--;
                 }
   
@@@ -2342,7 -2247,6 +2342,7 @@@ static void be_evt_queues_destroy(struc
                         napi_hash_del(&eqo->napi);
                         netif_napi_del(&eqo->napi);
                 }
+ +              free_cpumask_var(eqo->affinity_mask);
                 be_queue_free(adapter, &eqo->q);
         }
   }
@@@ -2358,11 -2262,6 +2358,11 @@@ static int be_evt_queues_create(struct 
                                     adapter->cfg_num_qs);
   
         for_all_evt_queues(adapter, eqo, i) {
+ +              if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
+ +                      return -ENOMEM;
+ +              cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),
+ +                                          eqo->affinity_mask);
+ +
                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
                                BE_NAPI_WEIGHT);
                 napi_hash_add(&eqo->napi);
@@@ -2454,9 -2353,8 +2454,9 @@@ static void be_tx_queues_destroy(struc
   
   static int be_tx_qs_create(struct be_adapter *adapter)
   {
- -      struct be_queue_info *cq, *eq;
+ +      struct be_queue_info *cq;
         struct be_tx_obj *txo;
+ +      struct be_eq_obj *eqo;
         int status, i;
   
         adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
@@@ -2474,8 -2372,8 +2474,8 @@@
                 /* If num_evt_qs is less than num_tx_qs, then more than
                  * one txq share an eq
                  */
- -              eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
- -              status = be_cmd_cq_create(adapter, cq, eq, false, 3);
+ +              eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
+ +              status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
                 if (status)
                         return status;
   
@@@ -2487,9 -2385,6 +2487,9 @@@
                 status = be_cmd_txq_create(adapter, txo);
                 if (status)
                         return status;
+ +
+ +              netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
+ +                                  eqo->idx);
         }
   
         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
@@@ -2518,19 -2413,13 +2518,19 @@@ static int be_rx_cqs_create(struct be_a
         int rc, i;
   
         /* We can create as many RSS rings as there are EQs. */
- -      adapter->num_rx_qs = adapter->num_evt_qs;
+ +      adapter->num_rss_qs = adapter->num_evt_qs;
+ +
+ +      /* We'll use RSS only if atleast 2 RSS rings are supported. */
+ +      if (adapter->num_rss_qs <= 1)
+ +              adapter->num_rss_qs = 0;
   
- -      /* We'll use RSS only if atleast 2 RSS rings are supported.
- -       * When RSS is used, we'll need a default RXQ for non-IP traffic.
+ +      adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
+ +
+ +      /* When the interface is not capable of RSS rings (and there is no
+ +       * need to create a default RXQ) we'll still need one RXQ
          */
- -      if (adapter->num_rx_qs > 1)
- -              adapter->num_rx_qs++;
+ +      if (adapter->num_rx_qs == 0)
+ +              adapter->num_rx_qs = 1;
   
         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
         for_all_rx_queues(adapter, rxo, i) {
@@@ -2549,7 -2438,8 +2549,7 @@@
         }
   
         dev_info(&adapter->pdev->dev,
- -               "created %d RSS queue(s) and 1 default RX queue\n",
- -               adapter->num_rx_qs - 1);
+ +               "created %d RX queue(s)\n", adapter->num_rx_qs);
         return 0;
   }
   
@@@ -2659,7 -2549,7 +2659,7 @@@ loop_continue
         return work_done;
   }
   
- -static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status)
+ +static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
   {
         switch (status) {
         case BE_TX_COMP_HDR_PARSE_ERR:
@@@ -2674,7 -2564,7 +2674,7 @@@
         }
   }
   
- -static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status)
+ +static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
   {
         switch (status) {
         case LANCER_TX_COMP_LSO_ERR:
@@@ -2699,18 -2589,22 +2699,18 @@@
   static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
                           int idx)
   {
- -      struct be_eth_tx_compl *txcp;
         int num_wrbs = 0, work_done = 0;
- -      u32 compl_status;
- -      u16 last_idx;
+ +      struct be_tx_compl_info *txcp;
   
- -      while ((txcp = be_tx_compl_get(&txo->cq))) {
- -              last_idx = GET_TX_COMPL_BITS(wrb_index, txcp);
- -              num_wrbs += be_tx_compl_process(adapter, txo, last_idx);
+ +      while ((txcp = be_tx_compl_get(txo))) {
+ +              num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
                 work_done++;
   
- -              compl_status = GET_TX_COMPL_BITS(status, txcp);
- -              if (compl_status) {
+ +              if (txcp->status) {
                         if (lancer_chip(adapter))
- -                              lancer_update_tx_err(txo, compl_status);
+ +                              lancer_update_tx_err(txo, txcp->status);
                         else
- -                              be_update_tx_err(txo, compl_status);
+ +                              be_update_tx_err(txo, txcp->status);
                 }
         }
   
@@@ -2721,7 -2615,7 +2721,7 @@@
                 /* As Tx wrbs have been freed up, wake up netdev queue
                  * if it was stopped due to lack of tx wrbs.  */
                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
- -                  atomic_read(&txo->q.used) < txo->q.len / 2) {
+ +                  be_can_txq_wake(txo)) {
                         netif_wake_subqueue(adapter->netdev, idx);
                 }
   
@@@ -2913,12 -2807,12 +2913,12 @@@ void be_detect_error(struct be_adapter 
                         sliport_err2 = ioread32(adapter->db +
                                                 SLIPORT_ERROR2_OFFSET);
                         adapter->hw_error = true;
+ +                      error_detected = true;
                         /* Do not log error messages if its a FW reset */
                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
                                 dev_info(dev, "Firmware update in progress\n");
                         } else {
- -                              error_detected = true;
                                 dev_err(dev, "Error detected in the card\n");
                                 dev_err(dev, "ERR: sliport status 0x%x\n",
                                         sliport_status);
@@@ -3038,8 -2932,6 +3038,8 @@@ static int be_msix_register(struct be_a
                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
                 if (status)
                         goto err_msix;
+ +
+ +              irq_set_affinity_hint(vec, eqo->affinity_mask);
         }
   
         return 0;
@@@ -3084,7 -2976,7 +3084,7 @@@ static void be_irq_unregister(struct be
   {
         struct net_device *netdev = adapter->netdev;
         struct be_eq_obj *eqo;
- -      int i;
+ +      int i, vec;
   
         if (!adapter->isr_registered)
                 return;
@@@ -3096,11 -2988,8 +3096,11 @@@
         }
   
         /* MSIx */
- -      for_all_evt_queues(adapter, eqo, i)
- -              free_irq(be_msix_vec_get(adapter, eqo), eqo);
+ +      for_all_evt_queues(adapter, eqo, i) {
+ +              vec = be_msix_vec_get(adapter, eqo);
+ +              irq_set_affinity_hint(vec, NULL);
+ +              free_irq(vec, eqo);
+ +      }
   
   done:
         adapter->isr_registered = false;
@@@ -3182,14 -3071,12 +3182,14 @@@ static int be_rx_qs_create(struct be_ad
                         return rc;
         }
   
- -      /* The FW would like the default RXQ to be created first */
- -      rxo = default_rxo(adapter);
- -      rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, rx_frag_size,
- -                             adapter->if_handle, false, &rxo->rss_id);
- -      if (rc)
- -              return rc;
+ +      if (adapter->need_def_rxq || !adapter->num_rss_qs) {
+ +              rxo = default_rxo(adapter);
+ +              rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
+ +                                     rx_frag_size, adapter->if_handle,
+ +                                     false, &rxo->rss_id);
+ +              if (rc)
+ +                      return rc;
+ +      }
   
         for_all_rss_queues(adapter, rxo, i) {
                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
@@@ -3200,7 -3087,8 +3200,7 @@@
         }
   
         if (be_multi_rxq(adapter)) {
- -              for (j = 0; j < RSS_INDIR_TABLE_LEN;
- -                      j += adapter->num_rx_qs - 1) {
+ +              for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
                         for_all_rss_queues(adapter, rxo, i) {
                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
                                         break;
@@@ -3291,7 -3179,7 +3291,7 @@@ static int be_setup_wol(struct be_adapt
         int status = 0;
         u8 mac[ETH_ALEN];
   
- -      memset(mac, 0, ETH_ALEN);
+ +      eth_zero_addr(mac);
   
         cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
         cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
@@@ -3436,14 -3324,6 +3436,14 @@@ static void be_cancel_worker(struct be_
         }
   }
   
+ +static void be_cancel_err_detection(struct be_adapter *adapter)
+ +{
+ +      if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
+ +              cancel_delayed_work_sync(&adapter->be_err_detection_work);
+ +              adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
+ +      }
+ +}
+ +
   static void be_mac_clear(struct be_adapter *adapter)
   {
         if (adapter->pmac_id) {
@@@ -3475,39 -3355,8 +3475,39 @@@ static void be_disable_vxlan_offloads(s
   }
   #endif
   
+ +static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs)
+ +{
+ +      struct be_resources res = adapter->pool_res;
+ +      u16 num_vf_qs = 1;
+ +
+ +      /* Distribute the queue resources equally among the PF and it's VFs
+ +       * Do not distribute queue resources in multi-channel configuration.
+ +       */
+ +      if (num_vfs && !be_is_mc(adapter)) {
+ +              /* If number of VFs requested is 8 less than max supported,
+ +               * assign 8 queue pairs to the PF and divide the remaining
+ +               * resources evenly among the VFs
+ +               */
+ +              if (num_vfs < (be_max_vfs(adapter) - 8))
+ +                      num_vf_qs = (res.max_rss_qs - 8) / num_vfs;
+ +              else
+ +                      num_vf_qs = res.max_rss_qs / num_vfs;
+ +
+ +              /* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable
+ +               * interfaces per port. Provide RSS on VFs, only if number
+ +               * of VFs requested is less than MAX_RSS_IFACES limit.
+ +               */
+ +              if (num_vfs >= MAX_RSS_IFACES)
+ +                      num_vf_qs = 1;
+ +      }
+ +      return num_vf_qs;
+ +}
+ +
   static int be_clear(struct be_adapter *adapter)
   {
+ +      struct pci_dev *pdev = adapter->pdev;
+ +      u16 num_vf_qs;
+ +
         be_cancel_worker(adapter);
   
         if (sriov_enabled(adapter))
@@@ -3516,14 -3365,9 +3516,14 @@@
         /* Re-configure FW to distribute resources evenly across max-supported
          * number of VFs, only when VFs are not already enabled.
          */
- -      if (be_physfn(adapter) && !pci_vfs_assigned(adapter->pdev))
+ +      if (skyhawk_chip(adapter) && be_physfn(adapter) &&
+ +          !pci_vfs_assigned(pdev)) {
+ +              num_vf_qs = be_calculate_vf_qs(adapter,
+ +                                             pci_sriov_get_totalvfs(pdev));
                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
- -                                      pci_sriov_get_totalvfs(adapter->pdev));
+ +                                      pci_sriov_get_totalvfs(pdev),
+ +                                      num_vf_qs);
+ +      }
   
   #ifdef CONFIG_BE2NET_VXLAN
         be_disable_vxlan_offloads(adapter);
@@@ -3547,7 -3391,7 +3547,7 @@@ static int be_if_create(struct be_adapt
   
         en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
                    BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |
- -                 BE_IF_FLAGS_RSS;
+ +                 BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
   
         en_flags &= cap_flags;
   
@@@ -3568,7 -3412,6 +3568,7 @@@ static int be_vfs_if_create(struct be_a
         for_all_vfs(adapter, vf_cfg, vf) {
                 if (!BE3_chip(adapter)) {
                         status = be_cmd_get_profile_config(adapter, &res,
+ +                                                         RESOURCE_LIMITS,
                                                            vf + 1);
                         if (!status) {
                                 cap_flags = res.if_cap_flags;
@@@ -3742,8 -3585,7 +3742,8 @@@ static void BEx_get_resources(struct be
                 /* On a SuperNIC profile, the driver needs to use the
                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
                  */
- -              be_cmd_get_profile_config(adapter, &super_nic_res, 0);
+ +              be_cmd_get_profile_config(adapter, &super_nic_res,
+ +                                        RESOURCE_LIMITS, 0);
                 /* Some old versions of BE3 FW don't report max_tx_qs value */
                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
         } else {
@@@ -3763,7 -3605,6 +3763,7 @@@
                 res->max_evt_qs = 1;
   
         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
+ +      res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
   }
@@@ -3783,12 -3624,13 +3783,12 @@@ static void be_setup_init(struct be_ada
   
   static int be_get_sriov_config(struct be_adapter *adapter)
   {
- -      struct device *dev = &adapter->pdev->dev;
         struct be_resources res = {0};
         int max_vfs, old_vfs;
   
- -      /* Some old versions of BE3 FW don't report max_vfs value */
- -      be_cmd_get_profile_config(adapter, &res, 0);
+ +      be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0);
   
+ +      /* Some old versions of BE3 FW don't report max_vfs value */
         if (BE3_chip(adapter) && !res.max_vfs) {
                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
@@@ -3796,49 -3638,35 +3796,49 @@@
   
         adapter->pool_res = res;
   
- -      if (!be_max_vfs(adapter)) {
- -              if (num_vfs)
- -                      dev_warn(dev, "SRIOV is disabled. Ignoring num_vfs\n");
- -              adapter->num_vfs = 0;
- -              return 0;
- -      }
- -
- -      pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
- -
- -      /* validate num_vfs module param */
+ +      /* If during previous unload of the driver, the VFs were not disabled,
+ +       * then we cannot rely on the PF POOL limits for the TotalVFs value.
+ +       * Instead use the TotalVFs value stored in the pci-dev struct.
+ +       */
         old_vfs = pci_num_vf(adapter->pdev);
         if (old_vfs) {
- -              dev_info(dev, "%d VFs are already enabled\n", old_vfs);
- -              if (old_vfs != num_vfs)
- -                      dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs);
+ +              dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
+ +                       old_vfs);
+ +
+ +              adapter->pool_res.max_vfs =
+ +                      pci_sriov_get_totalvfs(adapter->pdev);
                 adapter->num_vfs = old_vfs;
- -      } else {
- -              if (num_vfs > be_max_vfs(adapter)) {
- -                      dev_info(dev, "Resources unavailable to init %d VFs\n",
- -                               num_vfs);
- -                      dev_info(dev, "Limiting to %d VFs\n",
- -                               be_max_vfs(adapter));
- -              }
- -              adapter->num_vfs = min_t(u16, num_vfs, be_max_vfs(adapter));
         }
   
         return 0;
   }
   
+ +static void be_alloc_sriov_res(struct be_adapter *adapter)
+ +{
+ +      int old_vfs = pci_num_vf(adapter->pdev);
+ +      u16 num_vf_qs;
+ +      int status;
+ +
+ +      be_get_sriov_config(adapter);
+ +
+ +      if (!old_vfs)
+ +              pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
+ +
+ +      /* When the HW is in SRIOV capable configuration, the PF-pool
+ +       * resources are given to PF during driver load, if there are no
+ +       * old VFs. This facility is not available in BE3 FW.
+ +       * Also, this is done by FW in Lancer chip.
+ +       */
+ +      if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
+ +              num_vf_qs = be_calculate_vf_qs(adapter, 0);
+ +              status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
+ +                                               num_vf_qs);
+ +              if (status)
+ +                      dev_err(&adapter->pdev->dev,
+ +                              "Failed to optimize SRIOV resources\n");
+ +      }
+ +}
+ +
   static int be_get_resources(struct be_adapter *adapter)
   {
         struct device *dev = &adapter->pdev->dev;
@@@ -3859,23 -3687,12 +3859,23 @@@
                 if (status)
                         return status;
   
+ +              /* If a deafault RXQ must be created, we'll use up one RSSQ*/
+ +              if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
+ +                  !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
+ +                      res.max_rss_qs -= 1;
+ +
                 /* If RoCE may be enabled stash away half the EQs for RoCE */
                 if (be_roce_supported(adapter))
                         res.max_evt_qs /= 2;
                 adapter->res = res;
         }
   
+ +      /* If FW supports RSS default queue, then skip creating non-RSS
+ +       * queue for non-IP traffic.
+ +       */
+ +      adapter->need_def_rxq = (be_if_cap_flags(adapter) &
+ +                               BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
+ +
         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
                  be_max_txqs(adapter), be_max_rxqs(adapter),
                  be_max_rss(adapter), be_max_eqs(adapter),
@@@ -3884,33 -3701,47 +3884,33 @@@
                  be_max_uc(adapter), be_max_mc(adapter),
                  be_max_vlans(adapter));
   
+ +      /* Sanitize cfg_num_qs based on HW and platform limits */
+ +      adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(),
+ +                                  be_max_qs(adapter));
         return 0;
   }
   
- -static void be_sriov_config(struct be_adapter *adapter)
- -{
- -      struct device *dev = &adapter->pdev->dev;
- -      int status;
- -
- -      status = be_get_sriov_config(adapter);
- -      if (status) {
- -              dev_err(dev, "Failed to query SR-IOV configuration\n");
- -              dev_err(dev, "SR-IOV cannot be enabled\n");
- -              return;
- -      }
- -
- -      /* When the HW is in SRIOV capable configuration, the PF-pool
- -       * resources are equally distributed across the max-number of
- -       * VFs. The user may request only a subset of the max-vfs to be
- -       * enabled. Based on num_vfs, redistribute the resources across
- -       * num_vfs so that each VF will have access to more number of
- -       * resources. This facility is not available in BE3 FW.
- -       * Also, this is done by FW in Lancer chip.
- -       */
- -      if (be_max_vfs(adapter) && !pci_num_vf(adapter->pdev)) {
- -              status = be_cmd_set_sriov_config(adapter,
- -                                               adapter->pool_res,
- -                                               adapter->num_vfs);
- -              if (status)
- -                      dev_err(dev, "Failed to optimize SR-IOV resources\n");
- -      }
- -}
- -
   static int be_get_config(struct be_adapter *adapter)
   {
+ +      int status, level;
         u16 profile_id;
- -      int status;
+ +
+ +      status = be_cmd_get_cntl_attributes(adapter);
+ +      if (status)
+ +              return status;
   
         status = be_cmd_query_fw_cfg(adapter);
         if (status)
                 return status;
   
+ +      if (BEx_chip(adapter)) {
+ +              level = be_cmd_get_fw_log_level(adapter);
+ +              adapter->msg_enable =
+ +                      level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
+ +      }
+ +
+ +      be_cmd_get_acpi_wol_cap(adapter);
+ +
         be_cmd_query_port_name(adapter);
   
         if (be_physfn(adapter)) {
@@@ -3920,6 -3751,9 +3920,6 @@@
                                  "Using profile 0x%x\n", profile_id);
         }
   
- -      if (!BE2_chip(adapter) && be_physfn(adapter))
- -              be_sriov_config(adapter);
- -
         status = be_get_resources(adapter);
         if (status)
                 return status;
@@@ -3929,6 -3763,9 +3929,6 @@@
         if (!adapter->pmac_id)
                 return -ENOMEM;
   
- -      /* Sanitize cfg_num_qs based on HW and platform limits */
- -      adapter->cfg_num_qs = min(adapter->cfg_num_qs, be_max_qs(adapter));
- -
         return 0;
   }
   
@@@ -3962,13 -3799,6 +3962,13 @@@ static void be_schedule_worker(struct b
         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
   }
   
+ +static void be_schedule_err_detection(struct be_adapter *adapter)
+ +{
+ +      schedule_delayed_work(&adapter->be_err_detection_work,
+ +                            msecs_to_jiffies(1000));
+ +      adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
+ +}
+ +
   static int be_setup_queues(struct be_adapter *adapter)
   {
         struct net_device *netdev = adapter->netdev;
@@@ -4051,61 -3881,16 +4051,61 @@@ static inline int fw_major_num(const ch
         return fw_major;
   }
   
+ +/* If any VFs are already enabled don't FLR the PF */
+ +static bool be_reset_required(struct be_adapter *adapter)
+ +{
+ +      return pci_num_vf(adapter->pdev) ? false : true;
+ +}
+ +
+ +/* Wait for the FW to be ready and perform the required initialization */
+ +static int be_func_init(struct be_adapter *adapter)
+ +{
+ +      int status;
+ +
+ +      status = be_fw_wait_ready(adapter);
+ +      if (status)
+ +              return status;
+ +
+ +      if (be_reset_required(adapter)) {
+ +              status = be_cmd_reset_function(adapter);
+ +              if (status)
+ +                      return status;
+ +
+ +              /* Wait for interrupts to quiesce after an FLR */
+ +              msleep(100);
+ +
+ +              /* We can clear all errors when function reset succeeds */
+ +              be_clear_all_error(adapter);
+ +      }
+ +
+ +      /* Tell FW we're ready to fire cmds */
+ +      status = be_cmd_fw_init(adapter);
+ +      if (status)
+ +              return status;
+ +
+ +      /* Allow interrupts for other ULPs running on NIC function */
+ +      be_intr_set(adapter, true);
+ +
+ +      return 0;
+ +}
+ +
   static int be_setup(struct be_adapter *adapter)
   {
         struct device *dev = &adapter->pdev->dev;
         int status;
   
+ +      status = be_func_init(adapter);
+ +      if (status)
+ +              return status;
+ +
         be_setup_init(adapter);
   
         if (!lancer_chip(adapter))
                 be_cmd_req_native_mode(adapter);
   
+ +      if (!BE2_chip(adapter) && be_physfn(adapter))
+ +              be_alloc_sriov_res(adapter);
+ +
         status = be_get_config(adapter);
         if (status)
                 goto err;
@@@ -4146,6 -3931,8 +4146,6 @@@
   
         be_set_rx_mode(adapter->netdev);
   
- -      be_cmd_get_acpi_wol_cap(adapter);
- -
         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
                                          adapter->rx_fc);
         if (status)
@@@ -5055,165 -4842,29 +5055,165 @@@ static void be_netdev_init(struct net_d
         netdev->ethtool_ops = &be_ethtool_ops;
   }
   
- -static void be_unmap_pci_bars(struct be_adapter *adapter)
+ +static void be_cleanup(struct be_adapter *adapter)
   {
- -      if (adapter->csr)
- -              pci_iounmap(adapter->pdev, adapter->csr);
- -      if (adapter->db)
- -              pci_iounmap(adapter->pdev, adapter->db);
- -}
+ +      struct net_device *netdev = adapter->netdev;
   
- -static int db_bar(struct be_adapter *adapter)
- -{
- -      if (lancer_chip(adapter) || !be_physfn(adapter))
- -              return 0;
- -      else
- -              return 4;
+ +      rtnl_lock();
+ +      netif_device_detach(netdev);
+ +      if (netif_running(netdev))
+ +              be_close(netdev);
+ +      rtnl_unlock();
+ +
+ +      be_clear(adapter);
   }
   
- -static int be_roce_map_pci_bars(struct be_adapter *adapter)
+ +static int be_resume(struct be_adapter *adapter)
   {
- -      if (skyhawk_chip(adapter)) {
- -              adapter->roce_db.size = 4096;
- -              adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
- -                                                            db_bar(adapter));
- -              adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
+ +      struct net_device *netdev = adapter->netdev;
+ +      int status;
+ +
+ +      status = be_setup(adapter);
+ +      if (status)
+ +              return status;
+ +
+ +      if (netif_running(netdev)) {
+ +              status = be_open(netdev);
+ +              if (status)
+ +                      return status;
+ +      }
+ +
+ +      netif_device_attach(netdev);
+ +
+ +      return 0;
+ +}
+ +
+ +static int be_err_recover(struct be_adapter *adapter)
+ +{
+ +      struct device *dev = &adapter->pdev->dev;
+ +      int status;
+ +
+ +      status = be_resume(adapter);
+ +      if (status)
+ +              goto err;
+ +
+ +      dev_info(dev, "Adapter recovery successful\n");
+ +      return 0;
+ +err:
+ +      if (be_physfn(adapter))
+ +              dev_err(dev, "Adapter recovery failed\n");
+ +      else
+ +              dev_err(dev, "Re-trying adapter recovery\n");
+ +
+ +      return status;
+ +}
+ +
+ +static void be_err_detection_task(struct work_struct *work)
+ +{
+ +      struct be_adapter *adapter =
+ +                              container_of(work, struct be_adapter,
+ +                                           be_err_detection_work.work);
+ +      int status = 0;
+ +
+ +      be_detect_error(adapter);
+ +
+ +      if (adapter->hw_error) {
+ +              be_cleanup(adapter);
+ +
+ +              /* As of now error recovery support is in Lancer only */
+ +              if (lancer_chip(adapter))
+ +                      status = be_err_recover(adapter);
+ +      }
+ +
+ +      /* Always attempt recovery on VFs */
+ +      if (!status || be_virtfn(adapter))
+ +              be_schedule_err_detection(adapter);
+ +}
+ +
+ +static void be_log_sfp_info(struct be_adapter *adapter)
+ +{
+ +      int status;
+ +
+ +      status = be_cmd_query_sfp_info(adapter);
+ +      if (!status) {
+ +              dev_err(&adapter->pdev->dev,
+ +                      "Unqualified SFP+ detected on %c from %s part no: %s",
+ +                      adapter->port_name, adapter->phy.vendor_name,
+ +                      adapter->phy.vendor_pn);
+ +      }
+ +      adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP;
+ +}
+ +
+ +static void be_worker(struct work_struct *work)
+ +{
+ +      struct be_adapter *adapter =
+ +              container_of(work, struct be_adapter, work.work);
+ +      struct be_rx_obj *rxo;
+ +      int i;
+ +
+ +      /* when interrupts are not yet enabled, just reap any pending
+ +       * mcc completions
+ +       */
+ +      if (!netif_running(adapter->netdev)) {
+ +              local_bh_disable();
+ +              be_process_mcc(adapter);
+ +              local_bh_enable();
+ +              goto reschedule;
+ +      }
+ +
+ +      if (!adapter->stats_cmd_sent) {
+ +              if (lancer_chip(adapter))
+ +                      lancer_cmd_get_pport_stats(adapter,
+ +                                                 &adapter->stats_cmd);
+ +              else
+ +                      be_cmd_get_stats(adapter, &adapter->stats_cmd);
+ +      }
+ +
+ +      if (be_physfn(adapter) &&
+ +          MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
+ +              be_cmd_get_die_temperature(adapter);
+ +
+ +      for_all_rx_queues(adapter, rxo, i) {
+ +              /* Replenish RX-queues starved due to memory
+ +               * allocation failures.
+ +               */
+ +              if (rxo->rx_post_starved)
+ +                      be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
+ +      }
+ +
+ +      be_eqd_update(adapter);
+ +
+ +      if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP)
+ +              be_log_sfp_info(adapter);
+ +
+ +reschedule:
+ +      adapter->work_counter++;
+ +      schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+ +}
+ +
+ +static void be_unmap_pci_bars(struct be_adapter *adapter)
+ +{
+ +      if (adapter->csr)
+ +              pci_iounmap(adapter->pdev, adapter->csr);
+ +      if (adapter->db)
+ +              pci_iounmap(adapter->pdev, adapter->db);
+ +}
+ +
+ +static int db_bar(struct be_adapter *adapter)
+ +{
+ +      if (lancer_chip(adapter) || !be_physfn(adapter))
+ +              return 0;
+ +      else
+ +              return 4;
+ +}
+ +
+ +static int be_roce_map_pci_bars(struct be_adapter *adapter)
+ +{
+ +      if (skyhawk_chip(adapter)) {
+ +              adapter->roce_db.size = 4096;
+ +              adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
+ +                                                            db_bar(adapter));
+ +              adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
                                                                db_bar(adapter));
         }
         return 0;
@@@ -5223,12 -4874,6 +5223,12 @@@ static int be_map_pci_bars(struct be_ad
   {
         struct pci_dev *pdev = adapter->pdev;
         u8 __iomem *addr;
+ +      u32 sli_intf;
+ +
+ +      pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
+ +      adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
+ +                              SLI_INTF_FAMILY_SHIFT;
+ +      adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
   
         if (BEx_chip(adapter) && be_physfn(adapter)) {
                 adapter->csr = pci_iomap(pdev, 2, 0);
@@@ -5262,93 -4907,109 +5262,93 @@@ pci_map_err
         return -ENOMEM;
   }
   
- -static void be_ctrl_cleanup(struct be_adapter *adapter)
+ +static void be_drv_cleanup(struct be_adapter *adapter)
   {
         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
- -
- -      be_unmap_pci_bars(adapter);
+ +      struct device *dev = &adapter->pdev->dev;
   
         if (mem->va)
- -              dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
- -                                mem->dma);
+ +              dma_free_coherent(dev, mem->size, mem->va, mem->dma);
   
         mem = &adapter->rx_filter;
         if (mem->va)
- -              dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
- -                                mem->dma);
+ +              dma_free_coherent(dev, mem->size, mem->va, mem->dma);
+ +
+ +      mem = &adapter->stats_cmd;
+ +      if (mem->va)
+ +              dma_free_coherent(dev, mem->size, mem->va, mem->dma);
   }
   
- -static int be_ctrl_init(struct be_adapter *adapter)
+ +/* Allocate and initialize various fields in be_adapter struct */
+ +static int be_drv_init(struct be_adapter *adapter)
   {
         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
         struct be_dma_mem *rx_filter = &adapter->rx_filter;
- -      u32 sli_intf;
- -      int status;
- -
- -      pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
- -      adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
- -                               SLI_INTF_FAMILY_SHIFT;
- -      adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
- -
- -      status = be_map_pci_bars(adapter);
- -      if (status)
- -              goto done;
+ +      struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
+ +      struct device *dev = &adapter->pdev->dev;
+ +      int status = 0;
   
         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
- -      mbox_mem_alloc->va = dma_alloc_coherent(&adapter->pdev->dev,
- -                                              mbox_mem_alloc->size,
+ +      mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
                                                 &mbox_mem_alloc->dma,
                                                 GFP_KERNEL);
- -      if (!mbox_mem_alloc->va) {
- -              status = -ENOMEM;
- -              goto unmap_pci_bars;
- -      }
+ +      if (!mbox_mem_alloc->va)
+ +              return -ENOMEM;
+ +
         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
         memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox));
   
         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
- -      rx_filter->va = dma_zalloc_coherent(&adapter->pdev->dev,
- -                                          rx_filter->size, &rx_filter->dma,
- -                                          GFP_KERNEL);
+ +      rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
+ +                                          &rx_filter->dma, GFP_KERNEL);
         if (!rx_filter->va) {
                 status = -ENOMEM;
                 goto free_mbox;
         }
   
+ +      if (lancer_chip(adapter))
+ +              stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
+ +      else if (BE2_chip(adapter))
+ +              stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
+ +      else if (BE3_chip(adapter))
+ +              stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
+ +      else
+ +              stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
+ +      stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
+ +                                          &stats_cmd->dma, GFP_KERNEL);
+ +      if (!stats_cmd->va) {
+ +              status = -ENOMEM;
+ +              goto free_rx_filter;
+ +      }
+ +
         mutex_init(&adapter->mbox_lock);
         spin_lock_init(&adapter->mcc_lock);
         spin_lock_init(&adapter->mcc_cq_lock);
- -
         init_completion(&adapter->et_cmd_compl);
- -      pci_save_state(adapter->pdev);
- -      return 0;
- -
- -free_mbox:
- -      dma_free_coherent(&adapter->pdev->dev, mbox_mem_alloc->size,
- -                        mbox_mem_alloc->va, mbox_mem_alloc->dma);
- -
- -unmap_pci_bars:
- -      be_unmap_pci_bars(adapter);
- -
- -done:
- -      return status;
- -}
   
- -static void be_stats_cleanup(struct be_adapter *adapter)
- -{
- -      struct be_dma_mem *cmd = &adapter->stats_cmd;
+ +      pci_save_state(adapter->pdev);
   
- -      if (cmd->va)
- -              dma_free_coherent(&adapter->pdev->dev, cmd->size,
- -                                cmd->va, cmd->dma);
- -}
+ +      INIT_DELAYED_WORK(&adapter->work, be_worker);
+ +      INIT_DELAYED_WORK(&adapter->be_err_detection_work,
+ +                        be_err_detection_task);
   
- -static int be_stats_init(struct be_adapter *adapter)
- -{
- -      struct be_dma_mem *cmd = &adapter->stats_cmd;
+ +      adapter->rx_fc = true;
+ +      adapter->tx_fc = true;
   
- -      if (lancer_chip(adapter))
- -              cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
- -      else if (BE2_chip(adapter))
- -              cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
- -      else if (BE3_chip(adapter))
- -              cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
- -      else
- -              /* ALL non-BE ASICs */
- -              cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
+ +      /* Must be a power of 2 or else MODULO will BUG_ON */
+ +      adapter->be_get_temp_freq = 64;
   
- -      cmd->va = dma_zalloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma,
- -                                    GFP_KERNEL);
- -      if (!cmd->va)
- -              return -ENOMEM;
         return 0;
+ +
+ +free_rx_filter:
+ +      dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
+ +free_mbox:
+ +      dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
+ +                        mbox_mem_alloc->dma);
+ +      return status;
   }
   
   static void be_remove(struct pci_dev *pdev)
@@@ -5361,7 -5022,7 +5361,7 @@@
         be_roce_dev_remove(adapter);
         be_intr_set(adapter, false);
   
- -      cancel_delayed_work_sync(&adapter->func_recovery_work);
+ +      be_cancel_err_detection(adapter);
   
         unregister_netdev(adapter->netdev);
   
@@@ -5370,8 -5031,9 +5370,8 @@@
         /* tell fw we're done with firing cmds */
         be_cmd_fw_clean(adapter);
   
- -      be_stats_cleanup(adapter);
- -
- -      be_ctrl_cleanup(adapter);
+ +      be_unmap_pci_bars(adapter);
+ +      be_drv_cleanup(adapter);
   
         pci_disable_pcie_error_reporting(pdev);
   
@@@ -5381,6 -5043,156 +5381,6 @@@
         free_netdev(adapter->netdev);
   }
   
- -static int be_get_initial_config(struct be_adapter *adapter)
- -{
- -      int status, level;
- -
- -      status = be_cmd_get_cntl_attributes(adapter);
- -      if (status)
- -              return status;
- -
- -      /* Must be a power of 2 or else MODULO will BUG_ON */
- -      adapter->be_get_temp_freq = 64;
- -
- -      if (BEx_chip(adapter)) {
- -              level = be_cmd_get_fw_log_level(adapter);
- -              adapter->msg_enable =
- -                      level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
- -      }
- -
- -      adapter->cfg_num_qs = netif_get_num_default_rss_queues();
- -      return 0;
- -}
- -
- -static int lancer_recover_func(struct be_adapter *adapter)
- -{
- -      struct device *dev = &adapter->pdev->dev;
- -      int status;
- -
- -      status = lancer_test_and_set_rdy_state(adapter);
- -      if (status)
- -              goto err;
- -
- -      if (netif_running(adapter->netdev))
- -              be_close(adapter->netdev);
- -
- -      be_clear(adapter);
- -
- -      be_clear_all_error(adapter);
- -
- -      status = be_setup(adapter);
- -      if (status)
- -              goto err;
- -
- -      if (netif_running(adapter->netdev)) {
- -              status = be_open(adapter->netdev);
- -              if (status)
- -                      goto err;
- -      }
- -
- -      dev_err(dev, "Adapter recovery successful\n");
- -      return 0;
- -err:
- -      if (status == -EAGAIN)
- -              dev_err(dev, "Waiting for resource provisioning\n");
- -      else
- -              dev_err(dev, "Adapter recovery failed\n");
- -
- -      return status;
- -}
- -
- -static void be_func_recovery_task(struct work_struct *work)
- -{
- -      struct be_adapter *adapter =
- -              container_of(work, struct be_adapter,  func_recovery_work.work);
- -      int status = 0;
- -
- -      be_detect_error(adapter);
- -
- -      if (adapter->hw_error && lancer_chip(adapter)) {
- -              rtnl_lock();
- -              netif_device_detach(adapter->netdev);
- -              rtnl_unlock();
- -
- -              status = lancer_recover_func(adapter);
- -              if (!status)
- -                      netif_device_attach(adapter->netdev);
- -      }
- -
- -      /* In Lancer, for all errors other than provisioning error (-EAGAIN),
- -       * no need to attempt further recovery.
- -       */
- -      if (!status || status == -EAGAIN)
- -              schedule_delayed_work(&adapter->func_recovery_work,
- -                                    msecs_to_jiffies(1000));
- -}
- -
- -static void be_log_sfp_info(struct be_adapter *adapter)
- -{
- -      int status;
- -
- -      status = be_cmd_query_sfp_info(adapter);
- -      if (!status) {
- -              dev_err(&adapter->pdev->dev,
- -                      "Unqualified SFP+ detected on %c from %s part no: %s",
- -                      adapter->port_name, adapter->phy.vendor_name,
- -                      adapter->phy.vendor_pn);
- -      }
- -      adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP;
- -}
- -
- -static void be_worker(struct work_struct *work)
- -{
- -      struct be_adapter *adapter =
- -              container_of(work, struct be_adapter, work.work);
- -      struct be_rx_obj *rxo;
- -      int i;
- -
- -      /* when interrupts are not yet enabled, just reap any pending
- -      * mcc completions */
- -      if (!netif_running(adapter->netdev)) {
- -              local_bh_disable();
- -              be_process_mcc(adapter);
- -              local_bh_enable();
- -              goto reschedule;
- -      }
- -
- -      if (!adapter->stats_cmd_sent) {
- -              if (lancer_chip(adapter))
- -                      lancer_cmd_get_pport_stats(adapter,
- -                                                 &adapter->stats_cmd);
- -              else
- -                      be_cmd_get_stats(adapter, &adapter->stats_cmd);
- -      }
- -
- -      if (be_physfn(adapter) &&
- -          MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
- -              be_cmd_get_die_temperature(adapter);
- -
- -      for_all_rx_queues(adapter, rxo, i) {
- -              /* Replenish RX-queues starved due to memory
- -               * allocation failures.
- -               */
- -              if (rxo->rx_post_starved)
- -                      be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
- -      }
- -
- -      be_eqd_update(adapter);
- -
- -      if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP)
- -              be_log_sfp_info(adapter);
- -
- -reschedule:
- -      adapter->work_counter++;
- -      schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
- -}
- -
- -/* If any VFs are already enabled don't FLR the PF */
- -static bool be_reset_required(struct be_adapter *adapter)
- -{
- -      return pci_num_vf(adapter->pdev) ? false : true;
- -}
- -
   static char *mc_name(struct be_adapter *adapter)
   {
         char *str = ""; /* default */
@@@ -5479,17 -5291,50 +5479,17 @@@ static int be_probe(struct pci_dev *pde
         if (!status)
                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
   
- -      status = be_ctrl_init(adapter);
+ +      status = be_map_pci_bars(adapter);
         if (status)
                 goto free_netdev;
   
- -      /* sync up with fw's ready state */
- -      if (be_physfn(adapter)) {
- -              status = be_fw_wait_ready(adapter);
- -              if (status)
- -                      goto ctrl_clean;
- -      }
- -
- -      if (be_reset_required(adapter)) {
- -              status = be_cmd_reset_function(adapter);
- -              if (status)
- -                      goto ctrl_clean;
- -
- -              /* Wait for interrupts to quiesce after an FLR */
- -              msleep(100);
- -      }
- -
- -      /* Allow interrupts for other ULPs running on NIC function */
- -      be_intr_set(adapter, true);
- -
- -      /* tell fw we're ready to fire cmds */
- -      status = be_cmd_fw_init(adapter);
- -      if (status)
- -              goto ctrl_clean;
- -
- -      status = be_stats_init(adapter);
- -      if (status)
- -              goto ctrl_clean;
- -
- -      status = be_get_initial_config(adapter);
+ +      status = be_drv_init(adapter);
         if (status)
- -              goto stats_clean;
- -
- -      INIT_DELAYED_WORK(&adapter->work, be_worker);
- -      INIT_DELAYED_WORK(&adapter->func_recovery_work, be_func_recovery_task);
- -      adapter->rx_fc = true;
- -      adapter->tx_fc = true;
+ +              goto unmap_bars;
   
         status = be_setup(adapter);
         if (status)
- -              goto stats_clean;
+ +              goto drv_cleanup;
   
         be_netdev_init(netdev);
         status = register_netdev(netdev);
@@@ -5498,7 -5343,8 +5498,7 @@@
   
         be_roce_dev_add(adapter);
   
- -      schedule_delayed_work(&adapter->func_recovery_work,
- -                            msecs_to_jiffies(1000));
+ +      be_schedule_err_detection(adapter);
   
         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
                  func_name(adapter), mc_name(adapter), adapter->port_name);
@@@ -5507,10 -5353,10 +5507,10 @@@
   
   unsetup:
         be_clear(adapter);
- -stats_clean:
- -      be_stats_cleanup(adapter);
- -ctrl_clean:
- -      be_ctrl_cleanup(adapter);
+ +drv_cleanup:
+ +      be_drv_cleanup(adapter);
+ +unmap_bars:
+ +      be_unmap_pci_bars(adapter);
   free_netdev:
         free_netdev(netdev);
   rel_reg:
@@@ -5525,14 -5371,21 +5525,14 @@@ do_none
   static int be_suspend(struct pci_dev *pdev, pm_message_t state)
   {
         struct be_adapter *adapter = pci_get_drvdata(pdev);
- -      struct net_device *netdev =  adapter->netdev;
   
         if (adapter->wol_en)
                 be_setup_wol(adapter, true);
   
         be_intr_set(adapter, false);
- -      cancel_delayed_work_sync(&adapter->func_recovery_work);
+ +      be_cancel_err_detection(adapter);
   
- -      netif_device_detach(netdev);
- -      if (netif_running(netdev)) {
- -              rtnl_lock();
- -              be_close(netdev);
- -              rtnl_unlock();
- -      }
- -      be_clear(adapter);
+ +      be_cleanup(adapter);
   
         pci_save_state(pdev);
         pci_disable_device(pdev);
@@@ -5540,10 -5393,13 +5540,10 @@@
         return 0;
   }
   
- -static int be_resume(struct pci_dev *pdev)
+ +static int be_pci_resume(struct pci_dev *pdev)
   {
- -      int status = 0;
         struct be_adapter *adapter = pci_get_drvdata(pdev);
- -      struct net_device *netdev =  adapter->netdev;
- -
- -      netif_device_detach(netdev);
+ +      int status = 0;
   
         status = pci_enable_device(pdev);
         if (status)
@@@ -5552,11 -5408,30 +5552,11 @@@
         pci_set_power_state(pdev, PCI_D0);
         pci_restore_state(pdev);
   
- -      status = be_fw_wait_ready(adapter);
- -      if (status)
- -              return status;
- -
- -      status = be_cmd_reset_function(adapter);
- -      if (status)
- -              return status;
- -
- -      be_intr_set(adapter, true);
- -      /* tell fw we're ready to fire cmds */
- -      status = be_cmd_fw_init(adapter);
+ +      status = be_resume(adapter);
         if (status)
                 return status;
   
- -      be_setup(adapter);
- -      if (netif_running(netdev)) {
- -              rtnl_lock();
- -              be_open(netdev);
- -              rtnl_unlock();
- -      }
- -
- -      schedule_delayed_work(&adapter->func_recovery_work,
- -                            msecs_to_jiffies(1000));
- -      netif_device_attach(netdev);
+ +      be_schedule_err_detection(adapter);
   
         if (adapter->wol_en)
                 be_setup_wol(adapter, false);
@@@ -5576,7 -5451,7 +5576,7 @@@ static void be_shutdown(struct pci_dev 
   
         be_roce_dev_shutdown(adapter);
         cancel_delayed_work_sync(&adapter->work);
- -      cancel_delayed_work_sync(&adapter->func_recovery_work);
+ +      be_cancel_err_detection(adapter);
   
         netif_device_detach(adapter->netdev);
   
@@@ -5589,15 -5464,22 +5589,15 @@@ static pci_ers_result_t be_eeh_err_dete
                                             pci_channel_state_t state)
   {
         struct be_adapter *adapter = pci_get_drvdata(pdev);
- -      struct net_device *netdev =  adapter->netdev;
   
         dev_err(&adapter->pdev->dev, "EEH error detected\n");
   
         if (!adapter->eeh_error) {
                 adapter->eeh_error = true;
   
- -              cancel_delayed_work_sync(&adapter->func_recovery_work);
+ +              be_cancel_err_detection(adapter);
   
- -              rtnl_lock();
- -              netif_device_detach(netdev);
- -              if (netif_running(netdev))
- -                      be_close(netdev);
- -              rtnl_unlock();
- -
- -              be_clear(adapter);
+ +              be_cleanup(adapter);
         }
   
         if (state == pci_channel_io_perm_failure)
@@@ -5648,73 -5530,43 +5648,73 @@@ static void be_eeh_resume(struct pci_de
   {
         int status = 0;
         struct be_adapter *adapter = pci_get_drvdata(pdev);
- -      struct net_device *netdev =  adapter->netdev;
   
         dev_info(&adapter->pdev->dev, "EEH resume\n");
   
         pci_save_state(pdev);
   
- -      status = be_cmd_reset_function(adapter);
+ +      status = be_resume(adapter);
         if (status)
                 goto err;
   
- -      /* On some BE3 FW versions, after a HW reset,
- -       * interrupts will remain disabled for each function.
- -       * So, explicitly enable interrupts
+ +      be_schedule_err_detection(adapter);
+ +      return;
+ +err:
+ +      dev_err(&adapter->pdev->dev, "EEH resume failed\n");
+ +}
+ +
+ +static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+ +{
+ +      struct be_adapter *adapter = pci_get_drvdata(pdev);
+ +      u16 num_vf_qs;
+ +      int status;
+ +
+ +      if (!num_vfs)
+ +              be_vf_clear(adapter);
+ +
+ +      adapter->num_vfs = num_vfs;
+ +
+ +      if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
+ +              dev_warn(&pdev->dev,
+ +                       "Cannot disable VFs while they are assigned\n");
+ +              return -EBUSY;
+ +      }
+ +
+ +      /* When the HW is in SRIOV capable configuration, the PF-pool resources
+ +       * are equally distributed across the max-number of VFs. The user may
+ +       * request only a subset of the max-vfs to be enabled.
+ +       * Based on num_vfs, redistribute the resources across num_vfs so that
+ +       * each VF will have access to more number of resources.
+ +       * This facility is not available in BE3 FW.
+ +       * Also, this is done by FW in Lancer chip.
          */
- -      be_intr_set(adapter, true);
+ +      if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
+ +              num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs);
+ +              status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
+ +                                               adapter->num_vfs, num_vf_qs);
+ +              if (status)
+ +                      dev_err(&pdev->dev,
+ +                              "Failed to optimize SR-IOV resources\n");
+ +      }
   
- -      /* tell fw we're ready to fire cmds */
- -      status = be_cmd_fw_init(adapter);
+ +      status = be_get_resources(adapter);
         if (status)
- -              goto err;
+ +              return be_cmd_status(status);
   
- -      status = be_setup(adapter);
+ +      /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
+ +      rtnl_lock();
+ +      status = be_update_queues(adapter);
+ +      rtnl_unlock();
         if (status)
- -              goto err;
+ +              return be_cmd_status(status);
   
- -      if (netif_running(netdev)) {
- -              status = be_open(netdev);
- -              if (status)
- -                      goto err;
- -      }
+ +      if (adapter->num_vfs)
+ +              status = be_vf_setup(adapter);
   
- -      schedule_delayed_work(&adapter->func_recovery_work,
- -                            msecs_to_jiffies(1000));
- -      netif_device_attach(netdev);
- -      return;
- -err:
- -      dev_err(&adapter->pdev->dev, "EEH resume failed\n");
+ +      if (!status)
+ +              return adapter->num_vfs;
+ +
+ +      return 0;
   }
   
   static const struct pci_error_handlers be_eeh_handlers = {
@@@ -5729,9 -5581,8 +5729,9 @@@ static struct pci_driver be_driver = 
         .probe = be_probe,
         .remove = be_remove,
         .suspend = be_suspend,
- -      .resume = be_resume,
+ +      .resume = be_pci_resume,
         .shutdown = be_shutdown,
+ +      .sriov_configure = be_pci_sriov_configure,
         .err_handler = &be_eeh_handlers
   };
   
@@@ -5745,11 -5596,6 +5745,11 @@@ static int __init be_init_module(void
                 rx_frag_size = 2048;
         }
   
+ +      if (num_vfs > 0) {
+ +              pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
+ +              pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
+ +      }
+ +
         return pci_register_driver(&be_driver);
   }
   module_init(be_init_module);
diff --combined drivers/net/vxlan.c

index 577c9b071ad9e8568d955a39ce00eb185e52e186,fceb637efd6b1246a836a244600e00d68a504aeb..154116aafd0d8c5cb6caab9056a2245cbc3c783b
--- 1/drivers/net/vxlan.c
--- 2/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -127,6 -127,10 +127,6 @@@ struct vxlan_dev 
         __u8              ttl;
         u32               flags;        /* VXLAN_F_* in vxlan.h */
   
- -      struct work_struct sock_work;
- -      struct work_struct igmp_join;
- -      struct work_struct igmp_leave;
- -
         unsigned long     age_interval;
         struct timer_list age_timer;
         spinlock_t        hash_lock;
@@@ -140,56 -144,58 +140,56 @@@
   static u32 vxlan_salt __read_mostly;
   static struct workqueue_struct *vxlan_wq;
   
- -static void vxlan_sock_work(struct work_struct *work);
- -
   #if IS_ENABLED(CONFIG_IPV6)
   static inline
   bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
   {
- -       if (a->sa.sa_family != b->sa.sa_family)
- -               return false;
- -       if (a->sa.sa_family == AF_INET6)
- -               return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
- -       else
- -               return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+ +      if (a->sa.sa_family != b->sa.sa_family)
+ +              return false;
+ +      if (a->sa.sa_family == AF_INET6)
+ +              return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
+ +      else
+ +              return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
   }
   
   static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
   {
- -       if (ipa->sa.sa_family == AF_INET6)
- -               return ipv6_addr_any(&ipa->sin6.sin6_addr);
- -       else
- -               return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+ +      if (ipa->sa.sa_family == AF_INET6)
+ +              return ipv6_addr_any(&ipa->sin6.sin6_addr);
+ +      else
+ +              return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
   }
   
   static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
   {
- -       if (ipa->sa.sa_family == AF_INET6)
- -               return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
- -       else
- -               return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+ +      if (ipa->sa.sa_family == AF_INET6)
+ +              return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
+ +      else
+ +              return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
   }
   
   static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
   {
- -       if (nla_len(nla) >= sizeof(struct in6_addr)) {
- -               nla_memcpy(&ip->sin6.sin6_addr, nla, sizeof(struct in6_addr));
- -               ip->sa.sa_family = AF_INET6;
- -               return 0;
- -       } else if (nla_len(nla) >= sizeof(__be32)) {
- -               ip->sin.sin_addr.s_addr = nla_get_be32(nla);
- -               ip->sa.sa_family = AF_INET;
- -               return 0;
- -       } else {
- -               return -EAFNOSUPPORT;
- -       }
+ +      if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ +              ip->sin6.sin6_addr = nla_get_in6_addr(nla);
+ +              ip->sa.sa_family = AF_INET6;
+ +              return 0;
+ +      } else if (nla_len(nla) >= sizeof(__be32)) {
+ +              ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ +              ip->sa.sa_family = AF_INET;
+ +              return 0;
+ +      } else {
+ +              return -EAFNOSUPPORT;
+ +      }
   }
   
   static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
- -                             const union vxlan_addr *ip)
+ +                            const union vxlan_addr *ip)
   {
- -       if (ip->sa.sa_family == AF_INET6)
- -               return nla_put(skb, attr, sizeof(struct in6_addr), &ip->sin6.sin6_addr);
- -       else
- -               return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr);
+ +      if (ip->sa.sa_family == AF_INET6)
+ +              return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
+ +      else
+ +              return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
   }
   
   #else /* !CONFIG_IPV6 */
@@@ -197,36 -203,36 +197,36 @@@
   static inline
   bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
   {
- -       return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+ +      return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
   }
   
   static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
   {
- -       return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+ +      return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
   }
   
   static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
   {
- -       return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+ +      return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
   }
   
   static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
   {
- -       if (nla_len(nla) >= sizeof(struct in6_addr)) {
- -               return -EAFNOSUPPORT;
- -       } else if (nla_len(nla) >= sizeof(__be32)) {
- -               ip->sin.sin_addr.s_addr = nla_get_be32(nla);
- -               ip->sa.sa_family = AF_INET;
- -               return 0;
- -       } else {
- -               return -EAFNOSUPPORT;
- -       }
+ +      if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ +              return -EAFNOSUPPORT;
+ +      } else if (nla_len(nla) >= sizeof(__be32)) {
+ +              ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ +              ip->sa.sa_family = AF_INET;
+ +              return 0;
+ +      } else {
+ +              return -EAFNOSUPPORT;
+ +      }
   }
   
   static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
- -                             const union vxlan_addr *ip)
+ +                            const union vxlan_addr *ip)
   {
- -       return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr);
+ +      return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
   }
   #endif
   
@@@ -989,7 -995,7 +989,7 @@@ out
   
   /* Watch incoming packets to learn mapping between Ethernet address
    * and Tunnel endpoint.
- - * Return true if packet is bogus and should be droppped.
+ + * Return true if packet is bogus and should be dropped.
    */
   static bool vxlan_snoop(struct net_device *dev,
                         union vxlan_addr *src_ip, const u8 *src_mac)
@@@ -1066,6 -1072,11 +1066,6 @@@ static bool vxlan_group_used(struct vxl
         return false;
   }
   
- -static void vxlan_sock_hold(struct vxlan_sock *vs)
- -{
- -      atomic_inc(&vs->refcnt);
- -}
- -
   void vxlan_sock_release(struct vxlan_sock *vs)
   {
         struct sock *sk = vs->sock->sk;
@@@ -1084,16 -1095,17 +1084,16 @@@
   }
   EXPORT_SYMBOL_GPL(vxlan_sock_release);
   
- -/* Callback to update multicast group membership when first VNI on
- - * multicast asddress is brought up
- - * Done as workqueue because ip_mc_join_group acquires RTNL.
+ +/* Update multicast group membership when first VNI on
+ + * multicast address is brought up
    */
- -static void vxlan_igmp_join(struct work_struct *work)
+ +static int vxlan_igmp_join(struct vxlan_dev *vxlan)
   {
- -      struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
         struct vxlan_sock *vs = vxlan->vn_sock;
         struct sock *sk = vs->sock->sk;
         union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
         int ifindex = vxlan->default_dst.remote_ifindex;
+ +      int ret = -EINVAL;
   
         lock_sock(sk);
         if (ip->sa.sa_family == AF_INET) {
@@@ -1102,26 -1114,27 +1102,26 @@@
                         .imr_ifindex            = ifindex,
                 };
   
- -              ip_mc_join_group(sk, &mreq);
+ +              ret = ip_mc_join_group(sk, &mreq);
   #if IS_ENABLED(CONFIG_IPV6)
         } else {
- -              ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
- -                                           &ip->sin6.sin6_addr);
+ +              ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
+ +                                                 &ip->sin6.sin6_addr);
   #endif
         }
         release_sock(sk);
   
- -      vxlan_sock_release(vs);
- -      dev_put(vxlan->dev);
+ +      return ret;
   }
   
   /* Inverse of vxlan_igmp_join when last VNI is brought down */
- -static void vxlan_igmp_leave(struct work_struct *work)
+ +static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
   {
- -      struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
         struct vxlan_sock *vs = vxlan->vn_sock;
         struct sock *sk = vs->sock->sk;
         union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
         int ifindex = vxlan->default_dst.remote_ifindex;
+ +      int ret = -EINVAL;
   
         lock_sock(sk);
         if (ip->sa.sa_family == AF_INET) {
@@@ -1130,16 -1143,18 +1130,16 @@@
                         .imr_ifindex            = ifindex,
                 };
   
- -              ip_mc_leave_group(sk, &mreq);
+ +              ret = ip_mc_leave_group(sk, &mreq);
   #if IS_ENABLED(CONFIG_IPV6)
         } else {
- -              ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
- -                                           &ip->sin6.sin6_addr);
+ +              ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
+ +                                                 &ip->sin6.sin6_addr);
   #endif
         }
- -
         release_sock(sk);
   
- -      vxlan_sock_release(vs);
- -      dev_put(vxlan->dev);
+ +      return ret;
   }
   
   static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
@@@ -1229,7 -1244,7 +1229,7 @@@ static int vxlan_udp_encap_recv(struct 
                  * this as a malformed packet. This behavior diverges from
                  * VXLAN RFC (RFC7348) which stipulates that bits in reserved
                  * in reserved fields are to be ignored. The approach here
- -               * maintains compatbility with previous stack code, and also
+ +               * maintains compatibility with previous stack code, and also
                  * is more robust and provides a little more security in
                  * adding extensions to VXLAN.
                  */
@@@ -1672,8 -1687,7 +1672,8 @@@ static void vxlan_build_gbp_hdr(struct 
   }
   
   #if IS_ENABLED(CONFIG_IPV6)
- -static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
+ +static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+ +                         struct sk_buff *skb,
                            struct net_device *dev, struct in6_addr *saddr,
                            struct in6_addr *daddr, __u8 prio, __u8 ttl,
                            __be16 src_port, __be16 dst_port,
@@@ -1699,12 -1713,6 +1699,6 @@@
                 }
         }
   
-       skb = iptunnel_handle_offloads(skb, udp_sum, type);
-       if (IS_ERR(skb)) {
-               err = -EINVAL;
-               goto err;
-       }
- 
         skb_scrub_packet(skb, xnet);
   
         min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
@@@ -1724,6 -1732,12 +1718,12 @@@
                 goto err;
         }
   
+       skb = iptunnel_handle_offloads(skb, udp_sum, type);
+       if (IS_ERR(skb)) {
+               err = -EINVAL;
+               goto err;
+       }
+ 
         vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
         vxh->vx_flags = htonl(VXLAN_HF_VNI);
         vxh->vx_vni = md->vni;
@@@ -1749,7 -1763,7 +1749,7 @@@
   
         skb_set_inner_protocol(skb, htons(ETH_P_TEB));
   
- -      udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio,
+ +      udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
                              ttl, src_port, dst_port,
                              !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
         return 0;
@@@ -1759,7 -1773,7 +1759,7 @@@ err
   }
   #endif
   
- -int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
+ +int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
                    __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
                    __be16 src_port, __be16 dst_port,
                    struct vxlan_metadata *md, bool xnet, u32 vxflags)
@@@ -1784,10 -1798,6 +1784,6 @@@
                 }
         }
   
-       skb = iptunnel_handle_offloads(skb, udp_sum, type);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
- 
         min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
                         + VXLAN_HLEN + sizeof(struct iphdr)
                         + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@@ -1803,6 -1813,10 +1799,10 @@@
         if (WARN_ON(!skb))
                 return -ENOMEM;
   
+       skb = iptunnel_handle_offloads(skb, udp_sum, type);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
+ 
         vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
         vxh->vx_flags = htonl(VXLAN_HF_VNI);
         vxh->vx_vni = md->vni;
@@@ -1828,7 -1842,7 +1828,7 @@@
   
         skb_set_inner_protocol(skb, htons(ETH_P_TEB));
   
- -      return udp_tunnel_xmit_skb(rt, skb, src, dst, tos,
+ +      return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos,
                                    ttl, df, src_port, dst_port, xnet,
                                    !(vxflags & VXLAN_F_UDP_CSUM));
   }
@@@ -1883,7 -1897,6 +1883,7 @@@ static void vxlan_xmit_one(struct sk_bu
                            struct vxlan_rdst *rdst, bool did_rsc)
   {
         struct vxlan_dev *vxlan = netdev_priv(dev);
+ +      struct sock *sk = vxlan->vn_sock->sock->sk;
         struct rtable *rt = NULL;
         const struct iphdr *old_iph;
         struct flowi4 fl4;
@@@ -1963,7 -1976,7 +1963,7 @@@
                 md.vni = htonl(vni << 8);
                 md.gbp = skb->mark;
   
- -              err = vxlan_xmit_skb(rt, skb, fl4.saddr,
+ +              err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
                                      dst->sin.sin_addr.s_addr, tos, ttl, df,
                                      src_port, dst_port, &md,
                                      !net_eq(vxlan->net, dev_net(vxlan->dev)),
@@@ -1977,6 -1990,7 +1977,6 @@@
                 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
   #if IS_ENABLED(CONFIG_IPV6)
         } else {
- -              struct sock *sk = vxlan->vn_sock->sock->sk;
                 struct dst_entry *ndst;
                 struct flowi6 fl6;
                 u32 flags;
@@@ -2022,7 -2036,7 +2022,7 @@@
                 md.vni = htonl(vni << 8);
                 md.gbp = skb->mark;
   
- -              err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr,
+ +              err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
                                       0, ttl, src_port, dst_port, &md,
                                       !net_eq(vxlan->net, dev_net(vxlan->dev)),
                                       vxlan->flags);
@@@ -2161,22 -2175,37 +2161,22 @@@ static void vxlan_cleanup(unsigned lon
   
   static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
   {
+ +      struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
         __u32 vni = vxlan->default_dst.remote_vni;
   
         vxlan->vn_sock = vs;
+ +      spin_lock(&vn->sock_lock);
         hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+ +      spin_unlock(&vn->sock_lock);
   }
   
   /* Setup stats when device is created */
   static int vxlan_init(struct net_device *dev)
   {
- -      struct vxlan_dev *vxlan = netdev_priv(dev);
- -      struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- -      struct vxlan_sock *vs;
- -      bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
- -
         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
         if (!dev->tstats)
                 return -ENOMEM;
   
- -      spin_lock(&vn->sock_lock);
- -      vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- -                           vxlan->dst_port, vxlan->flags);
- -      if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
- -              /* If we have a socket with same port already, reuse it */
- -              vxlan_vs_add_dev(vs, vxlan);
- -      } else {
- -              /* otherwise make new socket outside of RTNL */
- -              dev_hold(dev);
- -              queue_work(vxlan_wq, &vxlan->sock_work);
- -      }
- -      spin_unlock(&vn->sock_lock);
- -
         return 0;
   }
   
@@@ -2194,9 -2223,12 +2194,9 @@@ static void vxlan_fdb_delete_default(st
   static void vxlan_uninit(struct net_device *dev)
   {
         struct vxlan_dev *vxlan = netdev_priv(dev);
- -      struct vxlan_sock *vs = vxlan->vn_sock;
   
         vxlan_fdb_delete_default(vxlan);
   
- -      if (vs)
- -              vxlan_sock_release(vs);
         free_percpu(dev->tstats);
   }
   
@@@ -2204,28 -2236,22 +2204,28 @@@
   static int vxlan_open(struct net_device *dev)
   {
         struct vxlan_dev *vxlan = netdev_priv(dev);
- -      struct vxlan_sock *vs = vxlan->vn_sock;
+ +      struct vxlan_sock *vs;
+ +      int ret = 0;
   
- -      /* socket hasn't been created */
- -      if (!vs)
- -              return -ENOTCONN;
+ +      vs = vxlan_sock_add(vxlan->net, vxlan->dst_port, vxlan_rcv, NULL,
+ +                          false, vxlan->flags);
+ +      if (IS_ERR(vs))
+ +              return PTR_ERR(vs);
+ +
+ +      vxlan_vs_add_dev(vs, vxlan);
   
         if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
- -              vxlan_sock_hold(vs);
- -              dev_hold(dev);
- -              queue_work(vxlan_wq, &vxlan->igmp_join);
+ +              ret = vxlan_igmp_join(vxlan);
+ +              if (ret) {
+ +                      vxlan_sock_release(vs);
+ +                      return ret;
+ +              }
         }
   
         if (vxlan->age_interval)
                 mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
   
- -      return 0;
+ +      return ret;
   }
   
   /* Purge the forwarding table */
@@@ -2253,18 -2279,19 +2253,18 @@@ static int vxlan_stop(struct net_devic
         struct vxlan_dev *vxlan = netdev_priv(dev);
         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
         struct vxlan_sock *vs = vxlan->vn_sock;
+ +      int ret = 0;
   
- -      if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
- -          !vxlan_group_used(vn, vxlan)) {
- -              vxlan_sock_hold(vs);
- -              dev_hold(dev);
- -              queue_work(vxlan_wq, &vxlan->igmp_leave);
- -      }
+ +      if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
+ +          !vxlan_group_used(vn, vxlan))
+ +              ret = vxlan_igmp_leave(vxlan);
   
         del_timer_sync(&vxlan->age_timer);
   
         vxlan_flush(vxlan);
+ +      vxlan_sock_release(vs);
   
- -      return 0;
+ +      return ret;
   }
   
   /* Stub, nothing needs to be done. */
@@@ -2375,6 -2402,9 +2375,6 @@@ static void vxlan_setup(struct net_devi
   
         INIT_LIST_HEAD(&vxlan->next);
         spin_lock_init(&vxlan->hash_lock);
- -      INIT_WORK(&vxlan->igmp_join, vxlan_igmp_join);
- -      INIT_WORK(&vxlan->igmp_leave, vxlan_igmp_leave);
- -      INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
   
         init_timer_deferrable(&vxlan->age_timer);
         vxlan->age_timer.function = vxlan_cleanup;
@@@ -2486,6 -2516,7 +2486,6 @@@ static struct socket *vxlan_create_sock
                     !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
         } else {
                 udp_conf.family = AF_INET;
- -              udp_conf.local_ip.s_addr = INADDR_ANY;
         }
   
         udp_conf.local_udp_port = port;
@@@ -2521,8 -2552,6 +2521,8 @@@ static struct vxlan_sock *vxlan_socket_
   
         sock = vxlan_create_sock(net, ipv6, port, flags);
         if (IS_ERR(sock)) {
+ +              pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
+ +                      PTR_ERR(sock));
                 kfree(vs);
                 return ERR_CAST(sock);
         }
@@@ -2562,23 -2591,45 +2562,23 @@@ struct vxlan_sock *vxlan_sock_add(struc
         struct vxlan_sock *vs;
         bool ipv6 = flags & VXLAN_F_IPV6;
   
- -      vs = vxlan_socket_create(net, port, rcv, data, flags);
- -      if (!IS_ERR(vs))
- -              return vs;
- -
- -      if (no_share)   /* Return error if sharing is not allowed. */
- -              return vs;
- -
- -      spin_lock(&vn->sock_lock);
- -      vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
- -      if (vs && ((vs->rcv != rcv) ||
- -                 !atomic_add_unless(&vs->refcnt, 1, 0)))
- -                      vs = ERR_PTR(-EBUSY);
- -      spin_unlock(&vn->sock_lock);
- -
- -      if (!vs)
- -              vs = ERR_PTR(-EINVAL);
+ +      if (!no_share) {
+ +              spin_lock(&vn->sock_lock);
+ +              vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
+ +                                   flags);
+ +              if (vs && vs->rcv == rcv) {
+ +                      if (!atomic_add_unless(&vs->refcnt, 1, 0))
+ +                              vs = ERR_PTR(-EBUSY);
+ +                      spin_unlock(&vn->sock_lock);
+ +                      return vs;
+ +              }
+ +              spin_unlock(&vn->sock_lock);
+ +      }
   
- -      return vs;
+ +      return vxlan_socket_create(net, port, rcv, data, flags);
   }
   EXPORT_SYMBOL_GPL(vxlan_sock_add);
   
- -/* Scheduled at device creation to bind to a socket */
- -static void vxlan_sock_work(struct work_struct *work)
- -{
- -      struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
- -      struct net *net = vxlan->net;
- -      struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- -      __be16 port = vxlan->dst_port;
- -      struct vxlan_sock *nvs;
- -
- -      nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags);
- -      spin_lock(&vn->sock_lock);
- -      if (!IS_ERR(nvs))
- -              vxlan_vs_add_dev(nvs, vxlan);
- -      spin_unlock(&vn->sock_lock);
- -
- -      dev_put(vxlan->dev);
- -}
- -
   static int vxlan_newlink(struct net *src_net, struct net_device *dev,
                          struct nlattr *tb[], struct nlattr *data[])
   {
@@@ -2600,25 -2651,27 +2600,25 @@@
         /* Unless IPv6 is explicitly requested, assume IPv4 */
         dst->remote_ip.sa.sa_family = AF_INET;
         if (data[IFLA_VXLAN_GROUP]) {
- -              dst->remote_ip.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+ +              dst->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
         } else if (data[IFLA_VXLAN_GROUP6]) {
                 if (!IS_ENABLED(CONFIG_IPV6))
                         return -EPFNOSUPPORT;
   
- -              nla_memcpy(&dst->remote_ip.sin6.sin6_addr, data[IFLA_VXLAN_GROUP6],
- -                         sizeof(struct in6_addr));
+ +              dst->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
                 dst->remote_ip.sa.sa_family = AF_INET6;
                 use_ipv6 = true;
         }
   
         if (data[IFLA_VXLAN_LOCAL]) {
- -              vxlan->saddr.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
+ +              vxlan->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
                 vxlan->saddr.sa.sa_family = AF_INET;
         } else if (data[IFLA_VXLAN_LOCAL6]) {
                 if (!IS_ENABLED(CONFIG_IPV6))
                         return -EPFNOSUPPORT;
   
                 /* TODO: respect scope id */
- -              nla_memcpy(&vxlan->saddr.sin6.sin6_addr, data[IFLA_VXLAN_LOCAL6],
- -                         sizeof(struct in6_addr));
+ +              vxlan->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
                 vxlan->saddr.sa.sa_family = AF_INET6;
                 use_ipv6 = true;
         }
@@@ -2803,13 -2856,13 +2803,13 @@@ static int vxlan_fill_info(struct sk_bu
   
         if (!vxlan_addr_any(&dst->remote_ip)) {
                 if (dst->remote_ip.sa.sa_family == AF_INET) {
- -                      if (nla_put_be32(skb, IFLA_VXLAN_GROUP,
- -                                       dst->remote_ip.sin.sin_addr.s_addr))
+ +                      if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
+ +                                          dst->remote_ip.sin.sin_addr.s_addr))
                                 goto nla_put_failure;
   #if IS_ENABLED(CONFIG_IPV6)
                 } else {
- -                      if (nla_put(skb, IFLA_VXLAN_GROUP6, sizeof(struct in6_addr),
- -                                  &dst->remote_ip.sin6.sin6_addr))
+ +                      if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
+ +                                           &dst->remote_ip.sin6.sin6_addr))
                                 goto nla_put_failure;
   #endif
                 }
@@@ -2820,13 -2873,13 +2820,13 @@@
   
         if (!vxlan_addr_any(&vxlan->saddr)) {
                 if (vxlan->saddr.sa.sa_family == AF_INET) {
- -                      if (nla_put_be32(skb, IFLA_VXLAN_LOCAL,
- -                                       vxlan->saddr.sin.sin_addr.s_addr))
+ +                      if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
+ +                                          vxlan->saddr.sin.sin_addr.s_addr))
                                 goto nla_put_failure;
   #if IS_ENABLED(CONFIG_IPV6)
                 } else {
- -                      if (nla_put(skb, IFLA_VXLAN_LOCAL6, sizeof(struct in6_addr),
- -                                  &vxlan->saddr.sin6.sin6_addr))
+ +                      if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
+ +                                           &vxlan->saddr.sin6.sin6_addr))
                                 goto nla_put_failure;
   #endif
                 }
diff --combined include/linux/brcmphy.h

index cab60661752237f736c817588d1d0e1a01469cdf,1c9920b38fa1b15f8dd91b2e47c653e9f7cebe0b..ae2982c0f7a60ed93339e767feaf1fc89aa02134
--- 1/include/linux/brcmphy.h
--- 2/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@@ -11,6 -11,7 +11,7 @@@
   #define PHY_ID_BCM5421                        0x002060e0
   #define PHY_ID_BCM5464                        0x002060b0
   #define PHY_ID_BCM5461                        0x002060c0
+ #define PHY_ID_BCM54616S              0x03625d10
   #define PHY_ID_BCM57780                       0x03625d90
   
   #define PHY_ID_BCM7250                        0xae025280
@@@ -19,7 -20,6 +20,7 @@@
   #define PHY_ID_BCM7425                        0x03625e60
   #define PHY_ID_BCM7429                        0x600d8730
   #define PHY_ID_BCM7439                        0x600d8480
+ +#define PHY_ID_BCM7439_2              0xae025080
   #define PHY_ID_BCM7445                        0x600d8510
   
   #define PHY_BCM_OUI_MASK              0xfffffc00
diff --combined net/ipv4/fou.c

index 263710259774151e40fa67ba3aa9652d4a1e2955,335e75207284e13eab64b748188d82764818b46d..af150b43b214123b052c43ec7e40449af3d7ecd2
--- 1/net/ipv4/fou.c
--- 2/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@@ -16,12 -16,14 +16,12 @@@
   #include <uapi/linux/fou.h>
   #include <uapi/linux/genetlink.h>
   
- -static DEFINE_SPINLOCK(fou_lock);
- -static LIST_HEAD(fou_list);
- -
   struct fou {
         struct socket *sock;
         u8 protocol;
         u8 flags;
- -      u16 port;
+ +      __be16 port;
+ +      u16 type;
         struct udp_offload udp_offloads;
         struct list_head list;
   };
@@@ -35,13 -37,6 +35,13 @@@ struct fou_cfg 
         struct udp_port_cfg udp_config;
   };
   
+ +static unsigned int fou_net_id;
+ +
+ +struct fou_net {
+ +      struct list_head fou_list;
+ +      struct mutex fou_lock;
+ +};
+ +
   static inline struct fou *fou_from_sock(struct sock *sk)
   {
         return sk->sk_user_data;
@@@ -392,21 -387,20 +392,21 @@@ out_unlock
         return err;
   }
   
- -static int fou_add_to_port_list(struct fou *fou)
+ +static int fou_add_to_port_list(struct net *net, struct fou *fou)
   {
+ +      struct fou_net *fn = net_generic(net, fou_net_id);
         struct fou *fout;
   
- -      spin_lock(&fou_lock);
- -      list_for_each_entry(fout, &fou_list, list) {
+ +      mutex_lock(&fn->fou_lock);
+ +      list_for_each_entry(fout, &fn->fou_list, list) {
                 if (fou->port == fout->port) {
- -                      spin_unlock(&fou_lock);
+ +                      mutex_unlock(&fn->fou_lock);
                         return -EALREADY;
                 }
         }
   
- -      list_add(&fou->list, &fou_list);
- -      spin_unlock(&fou_lock);
+ +      list_add(&fou->list, &fn->fou_list);
+ +      mutex_unlock(&fn->fou_lock);
   
         return 0;
   }
@@@ -416,10 -410,14 +416,10 @@@ static void fou_release(struct fou *fou
         struct socket *sock = fou->sock;
         struct sock *sk = sock->sk;
   
- -      udp_del_offload(&fou->udp_offloads);
- -
+ +      if (sk->sk_family == AF_INET)
+ +              udp_del_offload(&fou->udp_offloads);
         list_del(&fou->list);
- -
- -      /* Remove hooks into tunnel socket */
- -      sk->sk_user_data = NULL;
- -
- -      sock_release(sock);
+ +      udp_tunnel_sock_release(sock);
   
         kfree(fou);
   }
@@@ -449,10 -447,10 +449,10 @@@ static int gue_encap_init(struct sock *
   static int fou_create(struct net *net, struct fou_cfg *cfg,
                       struct socket **sockp)
   {
- -      struct fou *fou = NULL;
- -      int err;
         struct socket *sock = NULL;
+ +      struct fou *fou = NULL;
         struct sock *sk;
+ +      int err;
   
         /* Open UDP socket */
         err = udp_sock_create(net, &cfg->udp_config, &sock);
@@@ -488,8 -486,6 +488,8 @@@
                 goto error;
         }
   
+ +      fou->type = cfg->type;
+ +
         udp_sk(sk)->encap_type = 1;
         udp_encap_enable();
   
@@@ -506,7 -502,7 +506,7 @@@
                         goto error;
         }
   
- -      err = fou_add_to_port_list(fou);
+ +      err = fou_add_to_port_list(net, fou);
         if (err)
                 goto error;
   
@@@ -518,27 -514,27 +518,27 @@@
   error:
         kfree(fou);
         if (sock)
- -              sock_release(sock);
+ +              udp_tunnel_sock_release(sock);
   
         return err;
   }
   
   static int fou_destroy(struct net *net, struct fou_cfg *cfg)
   {
- -      struct fou *fou;
- -      u16 port = cfg->udp_config.local_udp_port;
+ +      struct fou_net *fn = net_generic(net, fou_net_id);
+ +      __be16 port = cfg->udp_config.local_udp_port;
         int err = -EINVAL;
+ +      struct fou *fou;
   
- -      spin_lock(&fou_lock);
- -      list_for_each_entry(fou, &fou_list, list) {
+ +      mutex_lock(&fn->fou_lock);
+ +      list_for_each_entry(fou, &fn->fou_list, list) {
                 if (fou->port == port) {
- -                      udp_del_offload(&fou->udp_offloads);
                         fou_release(fou);
                         err = 0;
                         break;
                 }
         }
- -      spin_unlock(&fou_lock);
+ +      mutex_unlock(&fn->fou_lock);
   
         return err;
   }
@@@ -577,7 -573,7 +577,7 @@@ static int parse_nl_config(struct genl_
         }
   
         if (info->attrs[FOU_ATTR_PORT]) {
- -              u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
+ +              __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
   
                 cfg->udp_config.local_udp_port = port;
         }
@@@ -596,7 -592,6 +596,7 @@@
   
   static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
   {
+ +      struct net *net = genl_info_net(info);
         struct fou_cfg cfg;
         int err;
   
@@@ -604,120 -599,16 +604,120 @@@
         if (err)
                 return err;
   
- -      return fou_create(&init_net, &cfg, NULL);
+ +      return fou_create(net, &cfg, NULL);
   }
   
   static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
   {
+ +      struct net *net = genl_info_net(info);
+ +      struct fou_cfg cfg;
+ +      int err;
+ +
+ +      err = parse_nl_config(info, &cfg);
+ +      if (err)
+ +              return err;
+ +
+ +      return fou_destroy(net, &cfg);
+ +}
+ +
+ +static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
+ +{
+ +      if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
+ +          nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
+ +          nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
+ +          nla_put_u8(msg, FOU_ATTR_TYPE, fou->type))
+ +              return -1;
+ +
+ +      if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
+ +              if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
+ +                      return -1;
+ +      return 0;
+ +}
+ +
+ +static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
+ +                       u32 flags, struct sk_buff *skb, u8 cmd)
+ +{
+ +      void *hdr;
+ +
+ +      hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
+ +      if (!hdr)
+ +              return -ENOMEM;
+ +
+ +      if (fou_fill_info(fou, skb) < 0)
+ +              goto nla_put_failure;
+ +
+ +      genlmsg_end(skb, hdr);
+ +      return 0;
+ +
+ +nla_put_failure:
+ +      genlmsg_cancel(skb, hdr);
+ +      return -EMSGSIZE;
+ +}
+ +
+ +static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
+ +{
+ +      struct net *net = genl_info_net(info);
+ +      struct fou_net *fn = net_generic(net, fou_net_id);
+ +      struct sk_buff *msg;
         struct fou_cfg cfg;
+ +      struct fou *fout;
+ +      __be16 port;
+ +      int ret;
+ +
+ +      ret = parse_nl_config(info, &cfg);
+ +      if (ret)
+ +              return ret;
+ +      port = cfg.udp_config.local_udp_port;
+ +      if (port == 0)
+ +              return -EINVAL;
+ +
+ +      msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ +      if (!msg)
+ +              return -ENOMEM;
+ +
+ +      ret = -ESRCH;
+ +      mutex_lock(&fn->fou_lock);
+ +      list_for_each_entry(fout, &fn->fou_list, list) {
+ +              if (port == fout->port) {
+ +                      ret = fou_dump_info(fout, info->snd_portid,
+ +                                          info->snd_seq, 0, msg,
+ +                                          info->genlhdr->cmd);
+ +                      break;
+ +              }
+ +      }
+ +      mutex_unlock(&fn->fou_lock);
+ +      if (ret < 0)
+ +              goto out_free;
   
- -      parse_nl_config(info, &cfg);
+ +      return genlmsg_reply(msg, info);
   
- -      return fou_destroy(&init_net, &cfg);
+ +out_free:
+ +      nlmsg_free(msg);
+ +      return ret;
+ +}
+ +
+ +static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ +{
+ +      struct net *net = sock_net(skb->sk);
+ +      struct fou_net *fn = net_generic(net, fou_net_id);
+ +      struct fou *fout;
+ +      int idx = 0, ret;
+ +
+ +      mutex_lock(&fn->fou_lock);
+ +      list_for_each_entry(fout, &fn->fou_list, list) {
+ +              if (idx++ < cb->args[0])
+ +                      continue;
+ +              ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
+ +                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ +                                  skb, FOU_CMD_GET);
+ +              if (ret)
+ +                      goto done;
+ +      }
+ +      mutex_unlock(&fn->fou_lock);
+ +
+ +done:
+ +      cb->args[0] = idx;
+ +      return skb->len;
   }
   
   static const struct genl_ops fou_nl_ops[] = {
@@@ -733,12 -624,6 +733,12 @@@
                 .policy = fou_nl_policy,
                 .flags = GENL_ADMIN_PERM,
         },
+ +      {
+ +              .cmd = FOU_CMD_GET,
+ +              .doit = fou_nl_cmd_get_port,
+ +              .dumpit = fou_nl_dump,
+ +              .policy = fou_nl_policy,
+ +      },
   };
   
   size_t fou_encap_hlen(struct ip_tunnel_encap *e)
@@@ -886,12 -771,12 +886,12 @@@ EXPORT_SYMBOL(gue_build_header)
   
   #ifdef CONFIG_NET_FOU_IP_TUNNELS
   
- static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = {
+ static const struct ip_tunnel_encap_ops fou_iptun_ops = {
         .encap_hlen = fou_encap_hlen,
         .build_header = fou_build_header,
   };
   
- static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = {
+ static const struct ip_tunnel_encap_ops gue_iptun_ops = {
         .encap_hlen = gue_encap_hlen,
         .build_header = gue_build_header,
   };
@@@ -935,63 -820,38 +935,63 @@@ static void ip_tunnel_encap_del_fou_ops
   
   #endif
   
+ +static __net_init int fou_init_net(struct net *net)
+ +{
+ +      struct fou_net *fn = net_generic(net, fou_net_id);
+ +
+ +      INIT_LIST_HEAD(&fn->fou_list);
+ +      mutex_init(&fn->fou_lock);
+ +      return 0;
+ +}
+ +
+ +static __net_exit void fou_exit_net(struct net *net)
+ +{
+ +      struct fou_net *fn = net_generic(net, fou_net_id);
+ +      struct fou *fou, *next;
+ +
+ +      /* Close all the FOU sockets */
+ +      mutex_lock(&fn->fou_lock);
+ +      list_for_each_entry_safe(fou, next, &fn->fou_list, list)
+ +              fou_release(fou);
+ +      mutex_unlock(&fn->fou_lock);
+ +}
+ +
+ +static struct pernet_operations fou_net_ops = {
+ +      .init = fou_init_net,
+ +      .exit = fou_exit_net,
+ +      .id   = &fou_net_id,
+ +      .size = sizeof(struct fou_net),
+ +};
+ +
   static int __init fou_init(void)
   {
         int ret;
   
+ +      ret = register_pernet_device(&fou_net_ops);
+ +      if (ret)
+ +              goto exit;
+ +
         ret = genl_register_family_with_ops(&fou_nl_family,
                                             fou_nl_ops);
- -
         if (ret < 0)
- -              goto exit;
+ +              goto unregister;
   
         ret = ip_tunnel_encap_add_fou_ops();
- -      if (ret < 0)
- -              genl_unregister_family(&fou_nl_family);
+ +      if (ret == 0)
+ +              return 0;
   
+ +      genl_unregister_family(&fou_nl_family);
+ +unregister:
+ +      unregister_pernet_device(&fou_net_ops);
   exit:
         return ret;
   }
   
   static void __exit fou_fini(void)
   {
- -      struct fou *fou, *next;
- -
         ip_tunnel_encap_del_fou_ops();
- -
         genl_unregister_family(&fou_nl_family);
- -
- -      /* Close all the FOU sockets */
- -
- -      spin_lock(&fou_lock);
- -      list_for_each_entry_safe(fou, next, &fou_list, list)
- -              fou_release(fou);
- -      spin_unlock(&fou_lock);
+ +      unregister_pernet_device(&fou_net_ops);
   }
   
   module_init(fou_init);
diff --combined net/ipv4/geneve.c

index b77f5e84c623f055fe277ea2178a29589fadaf1b,a566a2e4715b33d4b4a60e9756afa71f98f00774..8986e63f3bda61a6c8ba980c050b96ec90625107
--- 1/net/ipv4/geneve.c
--- 2/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@@ -113,10 -113,6 +113,6 @@@ int geneve_xmit_skb(struct geneve_sock 
         int min_headroom;
         int err;
   
-       skb = udp_tunnel_handle_offloads(skb, csum);
-       if (IS_ERR(skb))
-               return PTR_ERR(skb);
- 
         min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
                         + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
                         + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@@ -131,12 -127,16 +127,16 @@@
         if (unlikely(!skb))
                 return -ENOMEM;
   
+       skb = udp_tunnel_handle_offloads(skb, csum);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
+ 
         gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
         geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
   
         skb_set_inner_protocol(skb, htons(ETH_P_TEB));
   
- -      return udp_tunnel_xmit_skb(rt, skb, src, dst,
+ +      return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst,
                                    tos, ttl, df, src_port, dst_port, xnet,
                                    !csum);
   }
@@@ -196,7 -196,7 +196,7 @@@ static struct sk_buff **geneve_gro_rece
   
         rcu_read_lock();
         ptype = gro_find_receive_by_type(type);
- -      if (ptype == NULL) {
+ +      if (!ptype) {
                 flush = 1;
                 goto out_unlock;
         }
@@@ -230,7 -230,7 +230,7 @@@ static int geneve_gro_complete(struct s
   
         rcu_read_lock();
         ptype = gro_find_complete_by_type(type);
- -      if (ptype != NULL)
+ +      if (ptype)
                 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
   
         rcu_read_unlock();
diff --combined net/ipv4/tcp_output.c

index e662d85d1635d0269b669bb0f726760be3bae0d2,d520492ba698944620fe6207dffff2786eec7dfd..8c8d7e06b72fc1e5c4a50ca55136757f0501f8c0
--- 1/net/ipv4/tcp_output.c
--- 2/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -518,26 -518,17 +518,26 @@@ static void tcp_options_write(__be32 *p
   
         if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
                 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
+ +              u8 *p = (u8 *)ptr;
+ +              u32 len; /* Fast Open option length */
+ +
+ +              if (foc->exp) {
+ +                      len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
+ +                      *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
+ +                                   TCPOPT_FASTOPEN_MAGIC);
+ +                      p += TCPOLEN_EXP_FASTOPEN_BASE;
+ +              } else {
+ +                      len = TCPOLEN_FASTOPEN_BASE + foc->len;
+ +                      *p++ = TCPOPT_FASTOPEN;
+ +                      *p++ = len;
+ +              }
   
- -              *ptr++ = htonl((TCPOPT_EXP << 24) |
- -                             ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
- -                             TCPOPT_FASTOPEN_MAGIC);
- -
- -              memcpy(ptr, foc->val, foc->len);
- -              if ((foc->len & 3) == 2) {
- -                      u8 *align = ((u8 *)ptr) + foc->len;
- -                      align[0] = align[1] = TCPOPT_NOP;
+ +              memcpy(p, foc->val, foc->len);
+ +              if ((len & 3) == 2) {
+ +                      p[foc->len] = TCPOPT_NOP;
+ +                      p[foc->len + 1] = TCPOPT_NOP;
                 }
- -              ptr += (foc->len + 3) >> 2;
+ +              ptr += (len + 3) >> 2;
         }
   }
   
@@@ -574,7 -565,7 +574,7 @@@ static unsigned int tcp_syn_options(str
         opts->mss = tcp_advertise_mss(sk);
         remaining -= TCPOLEN_MSS_ALIGNED;
   
- -      if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
+ +      if (likely(sysctl_tcp_timestamps && !*md5)) {
                 opts->options |= OPTION_TS;
                 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
                 opts->tsecr = tp->rx_opt.ts_recent;
@@@ -592,17 -583,13 +592,17 @@@
         }
   
         if (fastopen && fastopen->cookie.len >= 0) {
- -              u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
+ +              u32 need = fastopen->cookie.len;
+ +
+ +              need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
+ +                                             TCPOLEN_FASTOPEN_BASE;
                 need = (need + 3) & ~3U;  /* Align to 32 bits */
                 if (remaining >= need) {
                         opts->options |= OPTION_FAST_OPEN_COOKIE;
                         opts->fastopen_cookie = &fastopen->cookie;
                         remaining -= need;
                         tp->syn_fastopen = 1;
+ +                      tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
                 }
         }
   
@@@ -614,14 -601,15 +614,14 @@@ static unsigned int tcp_synack_options(
                                    struct request_sock *req,
                                    unsigned int mss, struct sk_buff *skb,
                                    struct tcp_out_options *opts,
- -                                 struct tcp_md5sig_key **md5,
+ +                                 const struct tcp_md5sig_key *md5,
                                    struct tcp_fastopen_cookie *foc)
   {
         struct inet_request_sock *ireq = inet_rsk(req);
         unsigned int remaining = MAX_TCP_OPTION_SPACE;
   
   #ifdef CONFIG_TCP_MD5SIG
- -      *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
- -      if (*md5) {
+ +      if (md5) {
                 opts->options |= OPTION_MD5;
                 remaining -= TCPOLEN_MD5SIG_ALIGNED;
   
@@@ -632,6 -620,8 +632,6 @@@
                  */
                 ireq->tstamp_ok &= !ireq->sack_ok;
         }
- -#else
- -      *md5 = NULL;
   #endif
   
         /* We always send an MSS option. */
@@@ -655,10 -645,7 +655,10 @@@
                         remaining -= TCPOLEN_SACKPERM_ALIGNED;
         }
         if (foc != NULL && foc->len >= 0) {
- -              u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
+ +              u32 need = foc->len;
+ +
+ +              need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
+ +                                 TCPOLEN_FASTOPEN_BASE;
                 need = (need + 3) & ~3U;  /* Align to 32 bits */
                 if (remaining >= need) {
                         opts->options |= OPTION_FAST_OPEN_COOKIE;
@@@ -1002,7 -989,7 +1002,7 @@@ static int tcp_transmit_skb(struct soc
         if (md5) {
                 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
                 tp->af_specific->calc_md5_hash(opts.hash_location,
- -                                             md5, sk, NULL, skb);
+ +                                             md5, sk, skb);
         }
   #endif
   
@@@ -1164,7 -1151,7 +1164,7 @@@ int tcp_fragment(struct sock *sk, struc
   
         /* Get a new skb... force flag on. */
         buff = sk_stream_alloc_skb(sk, nsize, gfp);
- -      if (buff == NULL)
+ +      if (!buff)
                 return -ENOMEM; /* We'll just try again later. */
   
         sk->sk_wmem_queued += buff->truesize;
@@@ -1367,8 -1354,6 +1367,8 @@@ void tcp_mtup_init(struct sock *sk
                                icsk->icsk_af_ops->net_header_len;
         icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
         icsk->icsk_mtup.probe_size = 0;
+ +      if (icsk->icsk_mtup.enabled)
+ +              icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
   }
   EXPORT_SYMBOL(tcp_mtup_init);
   
@@@ -1723,7 -1708,7 +1723,7 @@@ static int tso_fragment(struct sock *sk
                 return tcp_fragment(sk, skb, len, mss_now, gfp);
   
         buff = sk_stream_alloc_skb(sk, 0, gfp);
- -      if (unlikely(buff == NULL))
+ +      if (unlikely(!buff))
                 return -ENOMEM;
   
         sk->sk_wmem_queued += buff->truesize;
@@@ -1767,23 -1752,20 +1767,23 @@@
   static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
                                  bool *is_cwnd_limited, u32 max_segs)
   {
- -      struct tcp_sock *tp = tcp_sk(sk);
         const struct inet_connection_sock *icsk = inet_csk(sk);
- -      u32 send_win, cong_win, limit, in_flight;
+ +      u32 age, send_win, cong_win, limit, in_flight;
+ +      struct tcp_sock *tp = tcp_sk(sk);
+ +      struct skb_mstamp now;
+ +      struct sk_buff *head;
         int win_divisor;
   
         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                 goto send_now;
   
- -      if (icsk->icsk_ca_state != TCP_CA_Open)
+ +      if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR)))
                 goto send_now;
   
- -      /* Defer for less than two clock ticks. */
- -      if (tp->tso_deferred &&
- -          (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
+ +      /* Avoid bursty behavior by allowing defer
+ +       * only if the last write was recent.
+ +       */
+ +      if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
                 goto send_now;
   
         in_flight = tcp_packets_in_flight(tp);
@@@ -1825,14 -1807,11 +1825,14 @@@
                         goto send_now;
         }
   
- -      /* Ok, it looks like it is advisable to defer.
- -       * Do not rearm the timer if already set to not break TCP ACK clocking.
- -       */
- -      if (!tp->tso_deferred)
- -              tp->tso_deferred = 1 | (jiffies << 1);
+ +      head = tcp_write_queue_head(sk);
+ +      skb_mstamp_get(&now);
+ +      age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
+ +      /* If next ACK is likely to come too late (half srtt), do not defer */
+ +      if (age < (tp->srtt_us >> 4))
+ +              goto send_now;
+ +
+ +      /* Ok, it looks like it is advisable to defer. */
   
         if (cong_win < send_win && cong_win < skb->len)
                 *is_cwnd_limited = true;
@@@ -1840,34 -1819,10 +1840,34 @@@
         return true;
   
   send_now:
- -      tp->tso_deferred = 0;
         return false;
   }
   
+ +static inline void tcp_mtu_check_reprobe(struct sock *sk)
+ +{
+ +      struct inet_connection_sock *icsk = inet_csk(sk);
+ +      struct tcp_sock *tp = tcp_sk(sk);
+ +      struct net *net = sock_net(sk);
+ +      u32 interval;
+ +      s32 delta;
+ +
+ +      interval = net->ipv4.sysctl_tcp_probe_interval;
+ +      delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+ +      if (unlikely(delta >= interval * HZ)) {
+ +              int mss = tcp_current_mss(sk);
+ +
+ +              /* Update current search range */
+ +              icsk->icsk_mtup.probe_size = 0;
+ +              icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
+ +                      sizeof(struct tcphdr) +
+ +                      icsk->icsk_af_ops->net_header_len;
+ +              icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+ +
+ +              /* Update probe time stamp */
+ +              icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ +      }
+ +}
+ +
   /* Create a new MTU probe if we are ready.
    * MTU probe is regularly attempting to increase the path MTU by
    * deliberately sending larger packets.  This discovers routing
@@@ -1882,13 -1837,11 +1882,13 @@@ static int tcp_mtu_probe(struct sock *s
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct sk_buff *skb, *nskb, *next;
+ +      struct net *net = sock_net(sk);
         int len;
         int probe_size;
         int size_needed;
         int copy;
         int mss_now;
+ +      int interval;
   
         /* Not currently probing/verifying,
          * not in recovery,
@@@ -1901,25 -1854,12 +1901,25 @@@
             tp->rx_opt.num_sacks || tp->rx_opt.dsack)
                 return -1;
   
- -      /* Very simple search strategy: just double the MSS. */
+ +      /* Use binary search for probe_size between tcp_mss_base,
+ +       * and current mss_clamp. if (search_high - search_low)
+ +       * smaller than a threshold, backoff from probing.
+ +       */
         mss_now = tcp_current_mss(sk);
- -      probe_size = 2 * tp->mss_cache;
+ +      probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
+ +                                  icsk->icsk_mtup.search_low) >> 1);
         size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
- -      if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
- -              /* TODO: set timer for probe_converge_event */
+ +      interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+ +      /* When misfortune happens, we are reprobing actively,
+ +       * and then reprobe timer has expired. We stick with current
+ +       * probing process by not resetting search range to its orignal.
+ +       */
+ +      if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+ +              interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ +              /* Check whether enough time has elaplased for
+ +               * another round of probing.
+ +               */
+ +              tcp_mtu_check_reprobe(sk);
                 return -1;
         }
   
@@@ -1941,8 -1881,7 +1941,8 @@@
         }
   
         /* We're allowed to probe.  Build it now. */
- -      if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
+ +      nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC);
+ +      if (!nskb)
                 return -1;
         sk->sk_wmem_queued += nskb->truesize;
         sk_mem_charge(sk, nskb->truesize);
@@@ -2240,7 -2179,7 +2240,7 @@@ void tcp_send_loss_probe(struct sock *s
         int mss = tcp_current_mss(sk);
         int err = -1;
   
- -      if (tcp_send_head(sk) != NULL) {
+ +      if (tcp_send_head(sk)) {
                 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
                 goto rearm_timer;
         }
@@@ -2750,7 -2689,7 +2750,7 @@@ void tcp_xmit_retransmit_queue(struct s
                 if (skb == tcp_send_head(sk))
                         break;
                 /* we could do better than to assign each time */
- -              if (hole == NULL)
+ +              if (!hole)
                         tp->retransmit_skb_hint = skb;
   
                 /* Assume this retransmit will generate
@@@ -2774,7 -2713,7 +2774,7 @@@ begin_fwd
                         if (!tcp_can_forward_retransmit(sk))
                                 break;
                         /* Backtrack if necessary to non-L'ed skb */
- -                      if (hole != NULL) {
+ +                      if (hole) {
                                 skb = hole;
                                 hole = NULL;
                         }
@@@ -2782,7 -2721,7 +2782,7 @@@
                         goto begin_fwd;
   
                 } else if (!(sacked & TCPCB_LOST)) {
- -                      if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
+ +                      if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
                                 hole = skb;
                         continue;
   
@@@ -2827,7 -2766,7 +2827,7 @@@ void tcp_send_fin(struct sock *sk
          */
         mss_now = tcp_current_mss(sk);
   
- -      if (tcp_send_head(sk) != NULL) {
+ +      if (tcp_send_head(sk)) {
                 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
                 TCP_SKB_CB(skb)->end_seq++;
                 tp->write_seq++;
@@@ -2885,14 -2824,14 +2885,14 @@@ int tcp_send_synack(struct sock *sk
         struct sk_buff *skb;
   
         skb = tcp_write_queue_head(sk);
- -      if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ +      if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
                 pr_debug("%s: wrong queue state\n", __func__);
                 return -EFAULT;
         }
         if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
                 if (skb_cloned(skb)) {
                         struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
- -                      if (nskb == NULL)
+ +                      if (!nskb)
                                 return -ENOMEM;
                         tcp_unlink_write_queue(skb, sk);
                         __skb_header_release(nskb);
@@@ -2927,7 -2866,7 +2927,7 @@@ struct sk_buff *tcp_make_synack(struct 
         struct tcp_sock *tp = tcp_sk(sk);
         struct tcphdr *th;
         struct sk_buff *skb;
- -      struct tcp_md5sig_key *md5;
+ +      struct tcp_md5sig_key *md5 = NULL;
         int tcp_header_size;
         int mss;
   
@@@ -2940,6 -2879,7 +2940,6 @@@
         skb_reserve(skb, MAX_TCP_HEADER);
   
         skb_dst_set(skb, dst);
- -      security_skb_owned_by(skb, sk);
   
         mss = dst_metric_advmss(dst);
         if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
@@@ -2952,12 -2892,7 +2952,12 @@@
         else
   #endif
         skb_mstamp_get(&skb->skb_mstamp);
- -      tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
+ +
+ +#ifdef CONFIG_TCP_MD5SIG
+ +      rcu_read_lock();
+ +      md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
+ +#endif
+ +      tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
                                              foc) + sizeof(*th);
   
         skb_push(skb, tcp_header_size);
@@@ -2988,12 -2923,14 +2988,14 @@@
   
   #ifdef CONFIG_TCP_MD5SIG
         /* Okay, we have all we need - do the md5 hash if needed */
- -      if (md5) {
+ +      if (md5)
                 tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
- -                                             md5, NULL, req, skb);
- -      }
+ +                                             md5, req_to_sk(req), skb);
+ +      rcu_read_unlock();
   #endif
   
+       /* Do not fool tcpdump (if any), clean our debris */
+       skb->tstamp.tv64 = 0;
         return skb;
   }
   EXPORT_SYMBOL(tcp_make_synack);
@@@ -3031,7 -2968,7 +3033,7 @@@ static void tcp_connect_init(struct soc
                 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
   
   #ifdef CONFIG_TCP_MD5SIG
- -      if (tp->af_specific->md5_lookup(sk, sk) != NULL)
+ +      if (tp->af_specific->md5_lookup(sk, sk))
                 tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
   #endif
   
@@@ -3317,7 -3254,7 +3319,7 @@@ void tcp_send_ack(struct sock *sk
          * sock.
          */
         buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
- -      if (buff == NULL) {
+ +      if (!buff) {
                 inet_csk_schedule_ack(sk);
                 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
@@@ -3361,7 -3298,7 +3363,7 @@@ static int tcp_xmit_probe_skb(struct so
   
         /* We don't queue it, tcp_transmit_skb() sets ownership. */
         skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
- -      if (skb == NULL)
+ +      if (!skb)
                 return -1;
   
         /* Reserve space for headers and set control bits. */
@@@ -3392,8 -3329,8 +3394,8 @@@ int tcp_write_wakeup(struct sock *sk
         if (sk->sk_state == TCP_CLOSE)
                 return -1;
   
- -      if ((skb = tcp_send_head(sk)) != NULL &&
- -          before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
+ +      skb = tcp_send_head(sk);
+ +      if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
                 int err;
                 unsigned int mss = tcp_current_mss(sk);
                 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
diff --combined net/ipv6/ip6_vti.c

index b53148444e157f821c86b467b166fc9ce7bd5ccb,a4ac85052e44e9ce2091ecd4ab5df2f9cbde2512..ed9d681207fa340881fd100db0ea1cb3eb9a2ffb
--- 1/net/ipv6/ip6_vti.c
--- 2/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@@ -218,7 -218,7 +218,7 @@@ static struct ip6_tnl *vti6_tnl_create(
                 sprintf(name, "ip6_vti%%d");
   
         dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
- -      if (dev == NULL)
+ +      if (!dev)
                 goto failed;
   
         dev_net_set(dev, net);
@@@ -288,8 -288,7 +288,7 @@@ static struct ip6_tnl *vti6_locate(stru
   static void vti6_dev_uninit(struct net_device *dev)
   {
         struct ip6_tnl *t = netdev_priv(dev);
-       struct net *net = dev_net(dev);
-       struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+       struct vti6_net *ip6n = net_generic(t->net, vti6_net_id);
   
         if (dev == ip6n->fb_tnl_dev)
                 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
@@@ -305,7 -304,7 +304,7 @@@ static int vti6_rcv(struct sk_buff *skb
   
         rcu_read_lock();
         t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
- -      if (t != NULL) {
+ +      if (t) {
                 if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
                         rcu_read_unlock();
                         goto discard;
@@@ -601,6 -600,8 +600,6 @@@ static void vti6_link_config(struct ip6
                 dev->flags |= IFF_POINTOPOINT;
         else
                 dev->flags &= ~IFF_POINTOPOINT;
- -
- -      dev->iflink = p->link;
   }
   
   /**
@@@ -714,7 -715,7 +713,7 @@@ vti6_ioctl(struct net_device *dev, stru
                 } else {
                         memset(&p, 0, sizeof(p));
                 }
- -              if (t == NULL)
+ +              if (!t)
                         t = netdev_priv(dev);
                 vti6_parm_to_user(&p, &t->parms);
                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
@@@ -734,7 -735,7 +733,7 @@@
                 vti6_parm_from_user(&p1, &p);
                 t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL);
                 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
- -                      if (t != NULL) {
+ +                      if (t) {
                                 if (t->dev != dev) {
                                         err = -EEXIST;
                                         break;
@@@ -765,7 -766,7 +764,7 @@@
                         err = -ENOENT;
                         vti6_parm_from_user(&p1, &p);
                         t = vti6_locate(net, &p1, 0);
- -                      if (t == NULL)
+ +                      if (!t)
                                 break;
                         err = -EPERM;
                         if (t->dev == ip6n->fb_tnl_dev)
@@@ -806,7 -807,6 +805,7 @@@ static const struct net_device_ops vti6
         .ndo_do_ioctl   = vti6_ioctl,
         .ndo_change_mtu = vti6_change_mtu,
         .ndo_get_stats64 = ip_tunnel_get_stats64,
+ +      .ndo_get_iflink = ip6_tnl_get_iflink,
   };
   
   /**
@@@ -896,10 -896,12 +895,10 @@@ static void vti6_netlink_parms(struct n
                 parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
   
         if (data[IFLA_VTI_LOCAL])
- -              nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
- -                         sizeof(struct in6_addr));
+ +              parms->laddr = nla_get_in6_addr(data[IFLA_VTI_LOCAL]);
   
         if (data[IFLA_VTI_REMOTE])
- -              nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
- -                         sizeof(struct in6_addr));
+ +              parms->raddr = nla_get_in6_addr(data[IFLA_VTI_REMOTE]);
   
         if (data[IFLA_VTI_IKEY])
                 parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
@@@ -980,8 -982,10 +979,8 @@@ static int vti6_fill_info(struct sk_buf
         struct __ip6_tnl_parm *parm = &tunnel->parms;
   
         if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
- -          nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
- -                  &parm->laddr) ||
- -          nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
- -                  &parm->raddr) ||
+ +          nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) ||
+ +          nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) ||
             nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
             nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
                 goto nla_put_failure;
@@@ -1022,7 -1026,7 +1021,7 @@@ static void __net_exit vti6_destroy_tun
   
         for (h = 0; h < HASH_SIZE; h++) {
                 t = rtnl_dereference(ip6n->tnls_r_l[h]);
- -              while (t != NULL) {
+ +              while (t) {
                         unregister_netdevice_queue(t->dev, &list);
                         t = rtnl_dereference(t->next);
                 }
diff --combined net/rds/rds.h

index c3f2855c3d8432272f7899608513a499558d9ad8,02d8fd5b40c08336bfd115ccdbc782357ca262c1..0d41155a2258cbbd16e19171c3daa376e3a83877
--- 1/net/rds/rds.h
--- 2/net/rds/rds.h
+++ b/net/rds/rds.h
@@@ -110,6 -110,7 +110,7 @@@ struct rds_connection 
         void                    *c_transport_data;
   
         atomic_t                c_state;
+       unsigned long           c_send_gen;
         unsigned long           c_flags;
         unsigned long           c_reconnect_jiffies;
         struct delayed_work     c_send_w;
@@@ -702,8 -703,8 +703,8 @@@ void rds_inc_init(struct rds_incoming *
   void rds_inc_put(struct rds_incoming *inc);
   void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
                        struct rds_incoming *inc, gfp_t gfp);
- -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- -              size_t size, int msg_flags);
+ +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ +              int msg_flags);
   void rds_clear_recv_queue(struct rds_sock *rs);
   int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
   void rds_inc_info_copy(struct rds_incoming *inc,
@@@ -711,7 -712,8 +712,7 @@@
                        __be32 saddr, __be32 daddr, int flip);
   
   /* send.c */
- -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- -              size_t payload_len);
+ +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
   void rds_send_reset(struct rds_connection *conn);
   int rds_send_xmit(struct rds_connection *conn);
   struct sockaddr_in;
diff --combined net/rds/send.c

index 44672befc0ee29a3e04ca01768c087fd0abd2f36,49f77efd82b9783260cda91a58b5b62ea9e98d1c..e9430f537f9c2bb23bbaeeb66933e1e85058bd34
--- 1/net/rds/send.c
--- 2/net/rds/send.c
+++ b/net/rds/send.c
@@@ -140,8 -140,11 +140,11 @@@ int rds_send_xmit(struct rds_connectio
         struct scatterlist *sg;
         int ret = 0;
         LIST_HEAD(to_be_dropped);
+       int batch_count;
+       unsigned long send_gen = 0;
   
   restart:
+       batch_count = 0;
   
         /*
          * sendmsg calls here after having queued its message on the send
@@@ -156,6 -159,17 +159,17 @@@
                 goto out;
         }
   
+       /*
+        * we record the send generation after doing the xmit acquire.
+        * if someone else manages to jump in and do some work, we'll use
+        * this to avoid a goto restart farther down.
+        *
+        * The acquire_in_xmit() check above ensures that only one
+        * caller can increment c_send_gen at any time.
+        */
+       conn->c_send_gen++;
+       send_gen = conn->c_send_gen;
+ 
         /*
          * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
          * we do the opposite to avoid races.
@@@ -202,6 -216,16 +216,16 @@@
                 if (!rm) {
                         unsigned int len;
   
+                       batch_count++;
+ 
+                       /* we want to process as big a batch as we can, but
+                        * we also want to avoid softlockups.  If we've been
+                        * through a lot of messages, lets back off and see
+                        * if anyone else jumps in
+                        */
+                       if (batch_count >= 1024)
+                               goto over_batch;
+ 
                         spin_lock_irqsave(&conn->c_lock, flags);
   
                         if (!list_empty(&conn->c_send_queue)) {
@@@ -357,9 -381,9 +381,9 @@@
                 }
         }
   
+ over_batch:
         if (conn->c_trans->xmit_complete)
                 conn->c_trans->xmit_complete(conn);
- 
         release_in_xmit(conn);
   
         /* Nuke any messages we decided not to retransmit. */
@@@ -380,10 -404,15 +404,15 @@@
          * If the transport cannot continue (i.e ret != 0), then it must
          * call us when more room is available, such as from the tx
          * completion handler.
+        *
+        * We have an extra generation check here so that if someone manages
+        * to jump in after our release_in_xmit, we'll see that they have done
+        * some work and we will skip our goto
          */
         if (ret == 0) {
                 smp_mb();
-               if (!list_empty(&conn->c_send_queue)) {
+               if (!list_empty(&conn->c_send_queue) &&
+                   send_gen == conn->c_send_gen) {
                         rds_stats_inc(s_send_lock_queue_raced);
                         goto restart;
                 }
@@@ -920,7 -949,8 +949,7 @@@ static int rds_cmsg_send(struct rds_soc
         return ret;
   }
   
- -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
- -              size_t payload_len)
+ +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
   {
         struct sock *sk = sock->sk;
         struct rds_sock *rs = rds_sk_to_rs(sk);
author	David S. Miller <davem@davemloft.net>
	Tue, 14 Apr 2015 19:44:14 +0000 (15:44 -0400)
committer	David S. Miller <davem@davemloft.net>
	Tue, 14 Apr 2015 19:44:14 +0000 (15:44 -0400)
		1	2
drivers/net/ethernet/emulex/benet/be.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/emulex/benet/be_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/vxlan.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/brcmphy.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/fou.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/geneve.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_output.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/ip6_vti.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/rds/rds.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/rds/send.c	patch \|	diff1 \|	diff2 \|	blob \| history