ixgbevf: fix possible crashes in probe and remove
[firefly-linux-kernel-4.4.55.git] / drivers / net / ethernet / intel / ixgbevf / ixgbevf_main.c
index 030a219c85e363802644f1302dbf56c0ac709c8b..3b0ddf757fb666f4ccc8c2825c7a4187244df642 100644 (file)
@@ -66,6 +66,8 @@ static char ixgbevf_copyright[] =
 static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
        [board_82599_vf] = &ixgbevf_82599_vf_info,
        [board_X540_vf]  = &ixgbevf_X540_vf_info,
+       [board_X550_vf]  = &ixgbevf_X550_vf_info,
+       [board_X550EM_x_vf] = &ixgbevf_X550EM_x_vf_info,
 };
 
 /* ixgbevf_pci_tbl - PCI Device ID Table
@@ -79,6 +81,8 @@ static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
 static const struct pci_device_id ixgbevf_pci_tbl[] = {
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF), board_82599_vf },
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF), board_X540_vf },
+       {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550_VF), board_X550_vf },
+       {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf },
        /* required last entry */
        {0, }
 };
@@ -143,21 +147,6 @@ u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg)
        return value;
 }
 
-static inline void ixgbevf_release_rx_desc(struct ixgbevf_ring *rx_ring,
-                                          u32 val)
-{
-       rx_ring->next_to_use = val;
-
-       /*
-        * Force memory writes to complete before letting h/w
-        * know there are new descriptors to fetch.  (Only
-        * applicable for weak-ordered memory model archs,
-        * such as IA-64).
-        */
-       wmb();
-       ixgbevf_write_tail(rx_ring, val);
-}
-
 /**
  * ixgbevf_set_ivar - set IVAR registers - maps interrupt causes to vectors
  * @adapter: pointer to adapter struct
@@ -342,40 +331,13 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
        return !!budget;
 }
 
-/**
- * ixgbevf_receive_skb - Send a completed packet up the stack
- * @q_vector: structure containing interrupt and ring information
- * @skb: packet to send up
- * @status: hardware indication of status of receive
- * @rx_desc: rx descriptor
- **/
-static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector,
-                               struct sk_buff *skb, u8 status,
-                               union ixgbe_adv_rx_desc *rx_desc)
-{
-       struct ixgbevf_adapter *adapter = q_vector->adapter;
-       bool is_vlan = (status & IXGBE_RXD_STAT_VP);
-       u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan);
-
-       if (is_vlan && test_bit(tag & VLAN_VID_MASK, adapter->active_vlans))
-               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
-
-       if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL))
-               napi_gro_receive(&q_vector->napi, skb);
-       else
-               netif_rx(skb);
-}
-
 /**
  * ixgbevf_rx_skb - Helper function to determine proper Rx method
  * @q_vector: structure containing interrupt and ring information
  * @skb: packet to send up
- * @status: hardware indication of status of receive
- * @rx_desc: rx descriptor
  **/
 static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector,
-                          struct sk_buff *skb, u8 status,
-                          union ixgbe_adv_rx_desc *rx_desc)
+                          struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
        skb_mark_napi_id(skb, &q_vector->napi);
@@ -387,17 +349,17 @@ static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector,
        }
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
-       ixgbevf_receive_skb(q_vector, skb, status, rx_desc);
+       napi_gro_receive(&q_vector->napi, skb);
 }
 
-/**
- * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum
- * @ring: pointer to Rx descriptor ring structure
- * @status_err: hardware indication of status of receive
+/* ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @ring: structure containig ring specific data
+ * @rx_desc: current Rx descriptor being processed
  * @skb: skb currently being received and modified
- **/
+ */
 static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
-                                      u32 status_err, struct sk_buff *skb)
+                                      union ixgbe_adv_rx_desc *rx_desc,
+                                      struct sk_buff *skb)
 {
        skb_checksum_none_assert(skb);
 
@@ -406,16 +368,16 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
                return;
 
        /* if IP and error */
-       if ((status_err & IXGBE_RXD_STAT_IPCS) &&
-           (status_err & IXGBE_RXDADV_ERR_IPE)) {
+       if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) &&
+           ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) {
                ring->rx_stats.csum_err++;
                return;
        }
 
-       if (!(status_err & IXGBE_RXD_STAT_L4CS))
+       if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS))
                return;
 
-       if (status_err & IXGBE_RXDADV_ERR_TCPE) {
+       if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) {
                ring->rx_stats.csum_err++;
                return;
        }
@@ -424,52 +386,413 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
        skb->ip_summed = CHECKSUM_UNNECESSARY;
 }
 
+/* ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the checksum, VLAN, protocol, and other fields within
+ * the skb.
+ */
+static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
+                                      union ixgbe_adv_rx_desc *rx_desc,
+                                      struct sk_buff *skb)
+{
+       ixgbevf_rx_checksum(rx_ring, rx_desc, skb);
+
+       if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
+               u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+               unsigned long *active_vlans = netdev_priv(rx_ring->netdev);
+
+               if (test_bit(vid & VLAN_VID_MASK, active_vlans))
+                       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+       }
+
+       skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+/**
+ * ixgbevf_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
+                              union ixgbe_adv_rx_desc *rx_desc)
+{
+       u32 ntc = rx_ring->next_to_clean + 1;
+
+       /* fetch, update, and store next to clean */
+       ntc = (ntc < rx_ring->count) ? ntc : 0;
+       rx_ring->next_to_clean = ntc;
+
+       prefetch(IXGBEVF_RX_DESC(rx_ring, ntc));
+
+       if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
+               return false;
+
+       return true;
+}
+
+static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
+                                     struct ixgbevf_rx_buffer *bi)
+{
+       struct page *page = bi->page;
+       dma_addr_t dma = bi->dma;
+
+       /* since we are recycling buffers we should seldom need to alloc */
+       if (likely(page))
+               return true;
+
+       /* alloc new page for storage */
+       page = dev_alloc_page();
+       if (unlikely(!page)) {
+               rx_ring->rx_stats.alloc_rx_page_failed++;
+               return false;
+       }
+
+       /* map page for use */
+       dma = dma_map_page(rx_ring->dev, page, 0,
+                          PAGE_SIZE, DMA_FROM_DEVICE);
+
+       /* if mapping failed free memory back to system since
+        * there isn't much point in holding memory we can't use
+        */
+       if (dma_mapping_error(rx_ring->dev, dma)) {
+               __free_page(page);
+
+               rx_ring->rx_stats.alloc_rx_buff_failed++;
+               return false;
+       }
+
+       bi->dma = dma;
+       bi->page = page;
+       bi->page_offset = 0;
+
+       return true;
+}
+
 /**
  * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split
  * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on
+ * @cleaned_count: number of buffers to replace
  **/
 static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
-                                    int cleaned_count)
+                                    u16 cleaned_count)
 {
        union ixgbe_adv_rx_desc *rx_desc;
        struct ixgbevf_rx_buffer *bi;
        unsigned int i = rx_ring->next_to_use;
 
-       while (cleaned_count--) {
-               rx_desc = IXGBEVF_RX_DESC(rx_ring, i);
-               bi = &rx_ring->rx_buffer_info[i];
+       /* nothing to do or no valid netdev defined */
+       if (!cleaned_count || !rx_ring->netdev)
+               return;
 
-               if (!bi->skb) {
-                       struct sk_buff *skb;
+       rx_desc = IXGBEVF_RX_DESC(rx_ring, i);
+       bi = &rx_ring->rx_buffer_info[i];
+       i -= rx_ring->count;
 
-                       skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-                                                       rx_ring->rx_buf_len);
-                       if (!skb)
-                               goto no_buffers;
+       do {
+               if (!ixgbevf_alloc_mapped_page(rx_ring, bi))
+                       break;
 
-                       bi->skb = skb;
+               /* Refresh the desc even if pkt_addr didn't change
+                * because each write-back erases this info.
+                */
+               rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
 
-                       bi->dma = dma_map_single(rx_ring->dev, skb->data,
-                                                rx_ring->rx_buf_len,
-                                                DMA_FROM_DEVICE);
-                       if (dma_mapping_error(rx_ring->dev, bi->dma)) {
-                               dev_kfree_skb(skb);
-                               bi->skb = NULL;
-                               dev_err(rx_ring->dev, "Rx DMA map failed\n");
-                               break;
-                       }
+               rx_desc++;
+               bi++;
+               i++;
+               if (unlikely(!i)) {
+                       rx_desc = IXGBEVF_RX_DESC(rx_ring, 0);
+                       bi = rx_ring->rx_buffer_info;
+                       i -= rx_ring->count;
                }
-               rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
 
-               i++;
-               if (i == rx_ring->count)
-                       i = 0;
+               /* clear the hdr_addr for the next_to_use descriptor */
+               rx_desc->read.hdr_addr = 0;
+
+               cleaned_count--;
+       } while (cleaned_count);
+
+       i += rx_ring->count;
+
+       if (rx_ring->next_to_use != i) {
+               /* record the next descriptor to use */
+               rx_ring->next_to_use = i;
+
+               /* update next to alloc since we have filled the ring */
+               rx_ring->next_to_alloc = i;
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.  (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64).
+                */
+               wmb();
+               ixgbevf_write_tail(rx_ring, i);
+       }
+}
+
+/* ixgbevf_pull_tail - ixgbevf specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ixgbevf specific version of __pskb_pull_tail.  The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ixgbevf_pull_tail(struct ixgbevf_ring *rx_ring,
+                             struct sk_buff *skb)
+{
+       struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+       unsigned char *va;
+       unsigned int pull_len;
+
+       /* it is valid to use page_address instead of kmap since we are
+        * working with pages allocated out of the lomem pool per
+        * alloc_page(GFP_ATOMIC)
+        */
+       va = skb_frag_address(frag);
+
+       /* we need the header to contain the greater of either ETH_HLEN or
+        * 60 bytes if the skb->len is less than 60 for skb_pad.
+        */
+       pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
+
+       /* align pull length to size of long to optimize memcpy performance */
+       skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+       /* update all of the pointers */
+       skb_frag_size_sub(frag, pull_len);
+       frag->page_offset += pull_len;
+       skb->data_len -= pull_len;
+       skb->tail += pull_len;
+}
+
+/* ixgbevf_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right.  These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
+                                   union ixgbe_adv_rx_desc *rx_desc,
+                                   struct sk_buff *skb)
+{
+       /* verify that the packet does not have any known errors */
+       if (unlikely(ixgbevf_test_staterr(rx_desc,
+                                         IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
+               struct net_device *netdev = rx_ring->netdev;
+
+               if (!(netdev->features & NETIF_F_RXALL)) {
+                       dev_kfree_skb_any(skb);
+                       return true;
+               }
+       }
+
+       /* place header in linear portion of buffer */
+       if (skb_is_nonlinear(skb))
+               ixgbevf_pull_tail(rx_ring, skb);
+
+       /* if skb_pad returns an error the skb was freed */
+       if (unlikely(skb->len < 60)) {
+               int pad_len = 60 - skb->len;
+
+               if (skb_pad(skb, pad_len))
+                       return true;
+               __skb_put(skb, pad_len);
+       }
+
+       return false;
+}
+
+/* ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *old_buff)
+{
+       struct ixgbevf_rx_buffer *new_buff;
+       u16 nta = rx_ring->next_to_alloc;
+
+       new_buff = &rx_ring->rx_buffer_info[nta];
+
+       /* update, and store next to alloc */
+       nta++;
+       rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+       /* transfer page from old buffer to new buffer */
+       new_buff->page = old_buff->page;
+       new_buff->dma = old_buff->dma;
+       new_buff->page_offset = old_buff->page_offset;
+
+       /* sync the buffer for use by the device */
+       dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
+                                        new_buff->page_offset,
+                                        IXGBEVF_RX_BUFSZ,
+                                        DMA_FROM_DEVICE);
+}
+
+static inline bool ixgbevf_page_is_reserved(struct page *page)
+{
+       return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc;
+}
+
+/* ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ */
+static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+                               struct ixgbevf_rx_buffer *rx_buffer,
+                               union ixgbe_adv_rx_desc *rx_desc,
+                               struct sk_buff *skb)
+{
+       struct page *page = rx_buffer->page;
+       unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = IXGBEVF_RX_BUFSZ;
+#else
+       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
+
+       if ((size <= IXGBEVF_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
+               unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+               /* page is not reserved, we can reuse buffer as is */
+               if (likely(!ixgbevf_page_is_reserved(page)))
+                       return true;
+
+               /* this page cannot be reused so discard it */
+               put_page(page);
+               return false;
        }
 
-no_buffers:
-       rx_ring->rx_stats.alloc_rx_buff_failed++;
-       if (rx_ring->next_to_use != i)
-               ixgbevf_release_rx_desc(rx_ring, i);
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+                       rx_buffer->page_offset, size, truesize);
+
+       /* avoid re-using remote pages */
+       if (unlikely(ixgbevf_page_is_reserved(page)))
+               return false;
+
+#if (PAGE_SIZE < 8192)
+       /* if we are only owner of page we can reuse it */
+       if (unlikely(page_count(page) != 1))
+               return false;
+
+       /* flip page offset to other buffer */
+       rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
+
+#else
+       /* move offset up to the next cache line */
+       rx_buffer->page_offset += truesize;
+
+       if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+               return false;
+
+#endif
+       /* Even if we own the page, we are not allowed to use atomic_set()
+        * This would break get_page_unless_zero() users.
+        */
+       atomic_inc(&page->_count);
+
+       return true;
+}
+
+static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                              union ixgbe_adv_rx_desc *rx_desc,
+                                              struct sk_buff *skb)
+{
+       struct ixgbevf_rx_buffer *rx_buffer;
+       struct page *page;
+
+       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       page = rx_buffer->page;
+       prefetchw(page);
+
+       if (likely(!skb)) {
+               void *page_addr = page_address(page) +
+                                 rx_buffer->page_offset;
+
+               /* prefetch first cache line of first page */
+               prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+               prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+               /* allocate a skb to store the frags */
+               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+                                               IXGBEVF_RX_HDR_SIZE);
+               if (unlikely(!skb)) {
+                       rx_ring->rx_stats.alloc_rx_buff_failed++;
+                       return NULL;
+               }
+
+               /* we will be copying header into skb->data in
+                * pskb_may_pull so it is in our interest to prefetch
+                * it now to avoid a possible cache miss
+                */
+               prefetchw(skb->data);
+       }
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     IXGBEVF_RX_BUFSZ,
+                                     DMA_FROM_DEVICE);
+
+       /* pull page into skb */
+       if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+               /* hand second half of page back to the ring */
+               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+       } else {
+               /* we are not reusing the buffer so unmap it */
+               dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+                              PAGE_SIZE, DMA_FROM_DEVICE);
+       }
+
+       /* clear contents of buffer_info */
+       rx_buffer->dma = 0;
+       rx_buffer->page = NULL;
+
+       return skb;
 }
 
 static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
@@ -484,78 +807,51 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                                struct ixgbevf_ring *rx_ring,
                                int budget)
 {
-       union ixgbe_adv_rx_desc *rx_desc, *next_rxd;
-       struct ixgbevf_rx_buffer *rx_buffer_info, *next_buffer;
-       struct sk_buff *skb;
-       unsigned int i;
-       u32 len, staterr;
-       int cleaned_count = 0;
        unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+       u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
+       struct sk_buff *skb = rx_ring->skb;
 
-       i = rx_ring->next_to_clean;
-       rx_desc = IXGBEVF_RX_DESC(rx_ring, i);
-       staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
-       rx_buffer_info = &rx_ring->rx_buffer_info[i];
+       while (likely(total_rx_packets < budget)) {
+               union ixgbe_adv_rx_desc *rx_desc;
 
-       while (staterr & IXGBE_RXD_STAT_DD) {
-               if (!budget)
-                       break;
-               budget--;
+               /* return some buffers to hardware, one at a time is too slow */
+               if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
+                       ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
 
-               rmb(); /* read descriptor and rx_buffer_info after status DD */
-               len = le16_to_cpu(rx_desc->wb.upper.length);
-               skb = rx_buffer_info->skb;
-               prefetch(skb->data - NET_IP_ALIGN);
-               rx_buffer_info->skb = NULL;
+               rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-               if (rx_buffer_info->dma) {
-                       dma_unmap_single(rx_ring->dev, rx_buffer_info->dma,
-                                        rx_ring->rx_buf_len,
-                                        DMA_FROM_DEVICE);
-                       rx_buffer_info->dma = 0;
-                       skb_put(skb, len);
-               }
+               if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+                       break;
 
-               i++;
-               if (i == rx_ring->count)
-                       i = 0;
+               /* This memory barrier is needed to keep us from reading
+                * any other fields out of the rx_desc until we know the
+                * RXD_STAT_DD bit is set
+                */
+               rmb();
 
-               next_rxd = IXGBEVF_RX_DESC(rx_ring, i);
-               prefetch(next_rxd);
-               cleaned_count++;
+               /* retrieve a buffer from the ring */
+               skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
 
-               next_buffer = &rx_ring->rx_buffer_info[i];
+               /* exit if we failed to retrieve a buffer */
+               if (!skb)
+                       break;
 
-               if (!(staterr & IXGBE_RXD_STAT_EOP)) {
-                       skb->next = next_buffer->skb;
-                       IXGBE_CB(skb->next)->prev = skb;
-                       rx_ring->rx_stats.non_eop_descs++;
-                       goto next_desc;
-               }
+               cleaned_count++;
 
-               /* we should not be chaining buffers, if we did drop the skb */
-               if (IXGBE_CB(skb)->prev) {
-                       do {
-                               struct sk_buff *this = skb;
-                               skb = IXGBE_CB(skb)->prev;
-                               dev_kfree_skb(this);
-                       } while (skb);
-                       goto next_desc;
-               }
+               /* fetch next buffer in frame if non-eop */
+               if (ixgbevf_is_non_eop(rx_ring, rx_desc))
+                       continue;
 
-               /* ERR_MASK will only have valid bits if EOP set */
-               if (unlikely(staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) {
-                       dev_kfree_skb_irq(skb);
-                       goto next_desc;
+               /* verify the packet layout is correct */
+               if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
+                       skb = NULL;
+                       continue;
                }
 
-               ixgbevf_rx_checksum(rx_ring, staterr, skb);
-
                /* probably a little skewed due to removing CRC */
                total_rx_bytes += skb->len;
-               total_rx_packets++;
-
-               skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 
                /* Workaround hardware that can't do proper VEPA multicast
                 * source pruning.
@@ -565,32 +861,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                    ether_addr_equal(rx_ring->netdev->dev_addr,
                                     eth_hdr(skb)->h_source)) {
                        dev_kfree_skb_irq(skb);
-                       goto next_desc;
+                       continue;
                }
 
-               ixgbevf_rx_skb(q_vector, skb, staterr, rx_desc);
+               /* populate checksum, VLAN, and protocol */
+               ixgbevf_process_skb_fields(rx_ring, rx_desc, skb);
 
-next_desc:
-               rx_desc->wb.upper.status_error = 0;
+               ixgbevf_rx_skb(q_vector, skb);
 
-               /* return some buffers to hardware, one at a time is too slow */
-               if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
-                       ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
-                       cleaned_count = 0;
-               }
-
-               /* use prefetched values */
-               rx_desc = next_rxd;
-               rx_buffer_info = &rx_ring->rx_buffer_info[i];
+               /* reset skb pointer */
+               skb = NULL;
 
-               staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+               /* update budget accounting */
+               total_rx_packets++;
        }
 
-       rx_ring->next_to_clean = i;
-       cleaned_count = ixgbevf_desc_unused(rx_ring);
-
-       if (cleaned_count)
-               ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
+       /* place incomplete frames back on ring for completion */
+       rx_ring->skb = skb;
 
        u64_stats_update_begin(&rx_ring->syncp);
        rx_ring->stats.packets += total_rx_packets;
@@ -634,12 +921,10 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget)
        else
                per_ring_budget = budget;
 
-       adapter->flags |= IXGBE_FLAG_IN_NETPOLL;
        ixgbevf_for_each_ring(ring, q_vector->rx)
                clean_complete &= (ixgbevf_clean_rx_irq(q_vector, ring,
                                                        per_ring_budget)
                                   < per_ring_budget);
-       adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
        ixgbevf_qv_unlock_napi(q_vector);
@@ -1229,19 +1514,15 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 
 static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
 {
-       struct ixgbevf_ring *rx_ring;
        struct ixgbe_hw *hw = &adapter->hw;
        u32 srrctl;
 
-       rx_ring = adapter->rx_ring[index];
-
        srrctl = IXGBE_SRRCTL_DROP_EN;
 
+       srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
+       srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
        srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
-       srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >>
-                 IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-
        IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
 }
 
@@ -1260,40 +1541,6 @@ static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter)
        IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 }
 
-static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter)
-{
-       struct ixgbe_hw *hw = &adapter->hw;
-       struct net_device *netdev = adapter->netdev;
-       int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
-       int i;
-       u16 rx_buf_len;
-
-       /* notify the PF of our intent to use this size of frame */
-       ixgbevf_rlpml_set_vf(hw, max_frame);
-
-       /* PF will allow an extra 4 bytes past for vlan tagged frames */
-       max_frame += VLAN_HLEN;
-
-       /*
-        * Allocate buffer sizes that fit well into 32K and
-        * take into account max frame size of 9.5K
-        */
-       if ((hw->mac.type == ixgbe_mac_X540_vf) &&
-           (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE))
-               rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE;
-       else if (max_frame <= IXGBEVF_RXBUFFER_2K)
-               rx_buf_len = IXGBEVF_RXBUFFER_2K;
-       else if (max_frame <= IXGBEVF_RXBUFFER_4K)
-               rx_buf_len = IXGBEVF_RXBUFFER_4K;
-       else if (max_frame <= IXGBEVF_RXBUFFER_8K)
-               rx_buf_len = IXGBEVF_RXBUFFER_8K;
-       else
-               rx_buf_len = IXGBEVF_RXBUFFER_10K;
-
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               adapter->rx_ring[i]->rx_buf_len = rx_buf_len;
-}
-
 #define IXGBEVF_MAX_RX_DESC_POLL 10
 static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter,
                                     struct ixgbevf_ring *ring)
@@ -1371,12 +1618,13 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        /* reset ntu and ntc to place SW in sync with hardwdare */
        ring->next_to_clean = 0;
        ring->next_to_use = 0;
+       ring->next_to_alloc = 0;
 
        ixgbevf_configure_srrctl(adapter, reg_idx);
 
-       /* prevent DMA from exceeding buffer space available */
-       rxdctl &= ~IXGBE_RXDCTL_RLPMLMASK;
-       rxdctl |= ring->rx_buf_len | IXGBE_RXDCTL_RLPML_EN;
+       /* allow any size packet since we can handle overflow */
+       rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+
        rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
        IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
 
@@ -1393,11 +1641,13 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 {
        int i;
+       struct ixgbe_hw *hw = &adapter->hw;
+       struct net_device *netdev = adapter->netdev;
 
        ixgbevf_setup_psrtype(adapter);
 
-       /* set_rx_buffer_len must be called before ring initialization */
-       ixgbevf_set_rx_buffer_len(adapter);
+       /* notify the PF of our intent to use this size of frame */
+       ixgbevf_rlpml_set_vf(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
 
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring */
@@ -1702,32 +1952,32 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter)
  **/
 static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
 {
+       struct device *dev = rx_ring->dev;
        unsigned long size;
        unsigned int i;
 
+       /* Free Rx ring sk_buff */
+       if (rx_ring->skb) {
+               dev_kfree_skb(rx_ring->skb);
+               rx_ring->skb = NULL;
+       }
+
+       /* ring already cleared, nothing to do */
        if (!rx_ring->rx_buffer_info)
                return;
 
-       /* Free all the Rx ring sk_buffs */
+       /* Free all the Rx ring pages */
        for (i = 0; i < rx_ring->count; i++) {
-               struct ixgbevf_rx_buffer *rx_buffer_info;
-
-               rx_buffer_info = &rx_ring->rx_buffer_info[i];
-               if (rx_buffer_info->dma) {
-                       dma_unmap_single(rx_ring->dev, rx_buffer_info->dma,
-                                        rx_ring->rx_buf_len,
-                                        DMA_FROM_DEVICE);
-                       rx_buffer_info->dma = 0;
-               }
-               if (rx_buffer_info->skb) {
-                       struct sk_buff *skb = rx_buffer_info->skb;
-                       rx_buffer_info->skb = NULL;
-                       do {
-                               struct sk_buff *this = skb;
-                               skb = IXGBE_CB(skb)->prev;
-                               dev_kfree_skb(this);
-                       } while (skb);
-               }
+               struct ixgbevf_rx_buffer *rx_buffer;
+
+               rx_buffer = &rx_ring->rx_buffer_info[i];
+               if (rx_buffer->dma)
+                       dma_unmap_page(dev, rx_buffer->dma,
+                                      PAGE_SIZE, DMA_FROM_DEVICE);
+               rx_buffer->dma = 0;
+               if (rx_buffer->page)
+                       __free_page(rx_buffer->page);
+               rx_buffer->page = NULL;
        }
 
        size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
@@ -3274,6 +3524,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
 static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       struct ixgbe_hw *hw = &adapter->hw;
        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
        int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE;
 
@@ -3282,7 +3533,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
                max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
                break;
        default:
-               if (adapter->hw.mac.type == ixgbe_mac_X540_vf)
+               if (adapter->hw.mac.type != ixgbe_mac_82599_vf)
                        max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
                break;
        }
@@ -3291,17 +3542,35 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
        if ((new_mtu < 68) || (max_frame > max_possible_frame))
                return -EINVAL;
 
-       hw_dbg(&adapter->hw, "changing MTU from %d to %d\n",
+       hw_dbg(hw, "changing MTU from %d to %d\n",
               netdev->mtu, new_mtu);
        /* must set new MTU before calling down or up */
        netdev->mtu = new_mtu;
 
-       if (netif_running(netdev))
-               ixgbevf_reinit_locked(adapter);
+       /* notify the PF of our intent to use this size of frame */
+       ixgbevf_rlpml_set_vf(hw, max_frame);
 
        return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void ixgbevf_netpoll(struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       int i;
+
+       /* if interface is down do nothing */
+       if (test_bit(__IXGBEVF_DOWN, &adapter->state))
+               return;
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               ixgbevf_msix_clean_rings(0, adapter->q_vector[i]);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
 static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
@@ -3438,6 +3707,9 @@ static const struct net_device_ops ixgbevf_netdev_ops = {
 #ifdef CONFIG_NET_RX_BUSY_POLL
        .ndo_busy_poll          = ixgbevf_busy_poll_recv,
 #endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = ixgbevf_netpoll,
+#endif
 };
 
 static void ixgbevf_assign_netdev_ops(struct net_device *dev)
@@ -3465,6 +3737,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct ixgbe_hw *hw = NULL;
        const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data];
        int err, pci_using_dac;
+       bool disable_dev = false;
 
        err = pci_enable_device(pdev);
        if (err)
@@ -3499,7 +3772,6 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        SET_NETDEV_DEV(netdev, &pdev->dev);
 
-       pci_set_drvdata(pdev, netdev);
        adapter = netdev_priv(netdev);
 
        adapter->netdev = netdev;
@@ -3588,16 +3860,28 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                goto err_register;
 
+       pci_set_drvdata(pdev, netdev);
        netif_carrier_off(netdev);
 
        ixgbevf_init_last_counter_stats(adapter);
 
-       /* print the MAC address */
-       hw_dbg(hw, "%pM\n", netdev->dev_addr);
+       /* print the VF info */
+       dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);
+       dev_info(&pdev->dev, "MAC: %d\n", hw->mac.type);
 
-       hw_dbg(hw, "MAC: %d\n", hw->mac.type);
+       switch (hw->mac.type) {
+       case ixgbe_mac_X550_vf:
+               dev_info(&pdev->dev, "Intel(R) X550 Virtual Function\n");
+               break;
+       case ixgbe_mac_X540_vf:
+               dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n");
+               break;
+       case ixgbe_mac_82599_vf:
+       default:
+               dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n");
+               break;
+       }
 
-       hw_dbg(hw, "Intel(R) 82599 Virtual Function\n");
        return 0;
 
 err_register:
@@ -3606,12 +3890,13 @@ err_sw_init:
        ixgbevf_reset_interrupt_capability(adapter);
        iounmap(adapter->io_addr);
 err_ioremap:
+       disable_dev = !test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state);
        free_netdev(netdev);
 err_alloc_etherdev:
        pci_release_regions(pdev);
 err_pci_reg:
 err_dma:
-       if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state))
+       if (!adapter || disable_dev)
                pci_disable_device(pdev);
        return err;
 }
@@ -3628,7 +3913,13 @@ err_dma:
 static void ixgbevf_remove(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
-       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       struct ixgbevf_adapter *adapter;
+       bool disable_dev;
+
+       if (!netdev)
+               return;
+
+       adapter = netdev_priv(netdev);
 
        set_bit(__IXGBEVF_REMOVING, &adapter->state);
 
@@ -3648,9 +3939,10 @@ static void ixgbevf_remove(struct pci_dev *pdev)
 
        hw_dbg(&adapter->hw, "Remove complete\n");
 
+       disable_dev = !test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state);
        free_netdev(netdev);
 
-       if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state))
+       if (disable_dev)
                pci_disable_device(pdev);
 }