862dd7c0cc7067c084afc45bc04dbe3951fa34f3
[firefly-linux-kernel-4.4.55.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/ip.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
54 #ifdef CONFIG_IGB_DCA
55 #include <linux/dca.h>
56 #endif
57 #include "igb.h"
58
59 #define MAJ 3
60 #define MIN 0
61 #define BUILD 6
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67                                 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70 static const struct e1000_info *igb_info_tbl[] = {
71         [board_82575] = &e1000_82575_info,
72 };
73
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100         /* required last entry */
101         {0, }
102 };
103
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130                                                  struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160                                int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163                                  struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166 #ifdef CONFIG_PM
167 static int igb_suspend(struct pci_dev *, pm_message_t);
168 static int igb_resume(struct pci_dev *);
169 #endif
170 static void igb_shutdown(struct pci_dev *);
171 #ifdef CONFIG_IGB_DCA
172 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173 static struct notifier_block dca_notifier = {
174         .notifier_call  = igb_notify_dca,
175         .next           = NULL,
176         .priority       = 0
177 };
178 #endif
179 #ifdef CONFIG_NET_POLL_CONTROLLER
180 /* for netdump / net console */
181 static void igb_netpoll(struct net_device *);
182 #endif
183 #ifdef CONFIG_PCI_IOV
184 static unsigned int max_vfs = 0;
185 module_param(max_vfs, uint, 0);
186 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                  "per physical function");
188 #endif /* CONFIG_PCI_IOV */
189
190 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191                      pci_channel_state_t);
192 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193 static void igb_io_resume(struct pci_dev *);
194
195 static struct pci_error_handlers igb_err_handler = {
196         .error_detected = igb_io_error_detected,
197         .slot_reset = igb_io_slot_reset,
198         .resume = igb_io_resume,
199 };
200
201
202 static struct pci_driver igb_driver = {
203         .name     = igb_driver_name,
204         .id_table = igb_pci_tbl,
205         .probe    = igb_probe,
206         .remove   = __devexit_p(igb_remove),
207 #ifdef CONFIG_PM
208         /* Power Management Hooks */
209         .suspend  = igb_suspend,
210         .resume   = igb_resume,
211 #endif
212         .shutdown = igb_shutdown,
213         .err_handler = &igb_err_handler
214 };
215
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
220
221 struct igb_reg_info {
222         u32 ofs;
223         char *name;
224 };
225
226 static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228         /* General Registers */
229         {E1000_CTRL, "CTRL"},
230         {E1000_STATUS, "STATUS"},
231         {E1000_CTRL_EXT, "CTRL_EXT"},
232
233         /* Interrupt Registers */
234         {E1000_ICR, "ICR"},
235
236         /* RX Registers */
237         {E1000_RCTL, "RCTL"},
238         {E1000_RDLEN(0), "RDLEN"},
239         {E1000_RDH(0), "RDH"},
240         {E1000_RDT(0), "RDT"},
241         {E1000_RXDCTL(0), "RXDCTL"},
242         {E1000_RDBAL(0), "RDBAL"},
243         {E1000_RDBAH(0), "RDBAH"},
244
245         /* TX Registers */
246         {E1000_TCTL, "TCTL"},
247         {E1000_TDBAL(0), "TDBAL"},
248         {E1000_TDBAH(0), "TDBAH"},
249         {E1000_TDLEN(0), "TDLEN"},
250         {E1000_TDH(0), "TDH"},
251         {E1000_TDT(0), "TDT"},
252         {E1000_TXDCTL(0), "TXDCTL"},
253         {E1000_TDFH, "TDFH"},
254         {E1000_TDFT, "TDFT"},
255         {E1000_TDFHS, "TDFHS"},
256         {E1000_TDFPC, "TDFPC"},
257
258         /* List Terminator */
259         {}
260 };
261
262 /*
263  * igb_regdump - register printout routine
264  */
265 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266 {
267         int n = 0;
268         char rname[16];
269         u32 regs[8];
270
271         switch (reginfo->ofs) {
272         case E1000_RDLEN(0):
273                 for (n = 0; n < 4; n++)
274                         regs[n] = rd32(E1000_RDLEN(n));
275                 break;
276         case E1000_RDH(0):
277                 for (n = 0; n < 4; n++)
278                         regs[n] = rd32(E1000_RDH(n));
279                 break;
280         case E1000_RDT(0):
281                 for (n = 0; n < 4; n++)
282                         regs[n] = rd32(E1000_RDT(n));
283                 break;
284         case E1000_RXDCTL(0):
285                 for (n = 0; n < 4; n++)
286                         regs[n] = rd32(E1000_RXDCTL(n));
287                 break;
288         case E1000_RDBAL(0):
289                 for (n = 0; n < 4; n++)
290                         regs[n] = rd32(E1000_RDBAL(n));
291                 break;
292         case E1000_RDBAH(0):
293                 for (n = 0; n < 4; n++)
294                         regs[n] = rd32(E1000_RDBAH(n));
295                 break;
296         case E1000_TDBAL(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_RDBAL(n));
299                 break;
300         case E1000_TDBAH(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_TDBAH(n));
303                 break;
304         case E1000_TDLEN(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_TDLEN(n));
307                 break;
308         case E1000_TDH(0):
309                 for (n = 0; n < 4; n++)
310                         regs[n] = rd32(E1000_TDH(n));
311                 break;
312         case E1000_TDT(0):
313                 for (n = 0; n < 4; n++)
314                         regs[n] = rd32(E1000_TDT(n));
315                 break;
316         case E1000_TXDCTL(0):
317                 for (n = 0; n < 4; n++)
318                         regs[n] = rd32(E1000_TXDCTL(n));
319                 break;
320         default:
321                 printk(KERN_INFO "%-15s %08x\n",
322                         reginfo->name, rd32(reginfo->ofs));
323                 return;
324         }
325
326         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327         printk(KERN_INFO "%-15s ", rname);
328         for (n = 0; n < 4; n++)
329                 printk(KERN_CONT "%08x ", regs[n]);
330         printk(KERN_CONT "\n");
331 }
332
333 /*
334  * igb_dump - Print registers, tx-rings and rx-rings
335  */
336 static void igb_dump(struct igb_adapter *adapter)
337 {
338         struct net_device *netdev = adapter->netdev;
339         struct e1000_hw *hw = &adapter->hw;
340         struct igb_reg_info *reginfo;
341         int n = 0;
342         struct igb_ring *tx_ring;
343         union e1000_adv_tx_desc *tx_desc;
344         struct my_u0 { u64 a; u64 b; } *u0;
345         struct igb_ring *rx_ring;
346         union e1000_adv_rx_desc *rx_desc;
347         u32 staterr;
348         int i = 0;
349
350         if (!netif_msg_hw(adapter))
351                 return;
352
353         /* Print netdevice Info */
354         if (netdev) {
355                 dev_info(&adapter->pdev->dev, "Net device Info\n");
356                 printk(KERN_INFO "Device Name     state            "
357                         "trans_start      last_rx\n");
358                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
359                 netdev->name,
360                 netdev->state,
361                 netdev->trans_start,
362                 netdev->last_rx);
363         }
364
365         /* Print Registers */
366         dev_info(&adapter->pdev->dev, "Register Dump\n");
367         printk(KERN_INFO " Register Name   Value\n");
368         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
369              reginfo->name; reginfo++) {
370                 igb_regdump(hw, reginfo);
371         }
372
373         /* Print TX Ring Summary */
374         if (!netdev || !netif_running(netdev))
375                 goto exit;
376
377         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
378         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
379                 " leng ntw timestamp\n");
380         for (n = 0; n < adapter->num_tx_queues; n++) {
381                 struct igb_tx_buffer *buffer_info;
382                 tx_ring = adapter->tx_ring[n];
383                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
384                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
385                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
386                            (u64)buffer_info->dma,
387                            buffer_info->length,
388                            buffer_info->next_to_watch,
389                            (u64)buffer_info->time_stamp);
390         }
391
392         /* Print TX Rings */
393         if (!netif_msg_tx_done(adapter))
394                 goto rx_ring_summary;
395
396         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
397
398         /* Transmit Descriptor Formats
399          *
400          * Advanced Transmit Descriptor
401          *   +--------------------------------------------------------------+
402          * 0 |         Buffer Address [63:0]                                |
403          *   +--------------------------------------------------------------+
404          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
405          *   +--------------------------------------------------------------+
406          *   63      46 45    40 39 38 36 35 32 31   24             15       0
407          */
408
409         for (n = 0; n < adapter->num_tx_queues; n++) {
410                 tx_ring = adapter->tx_ring[n];
411                 printk(KERN_INFO "------------------------------------\n");
412                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
413                 printk(KERN_INFO "------------------------------------\n");
414                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
415                         "[PlPOCIStDDM Ln] [bi->dma       ] "
416                         "leng  ntw timestamp        bi->skb\n");
417
418                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
419                         struct igb_tx_buffer *buffer_info;
420                         tx_desc = IGB_TX_DESC(tx_ring, i);
421                         buffer_info = &tx_ring->tx_buffer_info[i];
422                         u0 = (struct my_u0 *)tx_desc;
423                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
424                                 " %04X  %p %016llX %p", i,
425                                 le64_to_cpu(u0->a),
426                                 le64_to_cpu(u0->b),
427                                 (u64)buffer_info->dma,
428                                 buffer_info->length,
429                                 buffer_info->next_to_watch,
430                                 (u64)buffer_info->time_stamp,
431                                 buffer_info->skb);
432                         if (i == tx_ring->next_to_use &&
433                                 i == tx_ring->next_to_clean)
434                                 printk(KERN_CONT " NTC/U\n");
435                         else if (i == tx_ring->next_to_use)
436                                 printk(KERN_CONT " NTU\n");
437                         else if (i == tx_ring->next_to_clean)
438                                 printk(KERN_CONT " NTC\n");
439                         else
440                                 printk(KERN_CONT "\n");
441
442                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
443                                 print_hex_dump(KERN_INFO, "",
444                                         DUMP_PREFIX_ADDRESS,
445                                         16, 1, phys_to_virt(buffer_info->dma),
446                                         buffer_info->length, true);
447                 }
448         }
449
450         /* Print RX Rings Summary */
451 rx_ring_summary:
452         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
453         printk(KERN_INFO "Queue [NTU] [NTC]\n");
454         for (n = 0; n < adapter->num_rx_queues; n++) {
455                 rx_ring = adapter->rx_ring[n];
456                 printk(KERN_INFO " %5d %5X %5X\n", n,
457                            rx_ring->next_to_use, rx_ring->next_to_clean);
458         }
459
460         /* Print RX Rings */
461         if (!netif_msg_rx_status(adapter))
462                 goto exit;
463
464         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
465
466         /* Advanced Receive Descriptor (Read) Format
467          *    63                                           1        0
468          *    +-----------------------------------------------------+
469          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
470          *    +----------------------------------------------+------+
471          *  8 |       Header Buffer Address [63:1]           |  DD  |
472          *    +-----------------------------------------------------+
473          *
474          *
475          * Advanced Receive Descriptor (Write-Back) Format
476          *
477          *   63       48 47    32 31  30      21 20 17 16   4 3     0
478          *   +------------------------------------------------------+
479          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
480          *   | Checksum   Ident  |   |           |    | Type | Type |
481          *   +------------------------------------------------------+
482          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
483          *   +------------------------------------------------------+
484          *   63       48 47    32 31            20 19               0
485          */
486
487         for (n = 0; n < adapter->num_rx_queues; n++) {
488                 rx_ring = adapter->rx_ring[n];
489                 printk(KERN_INFO "------------------------------------\n");
490                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
491                 printk(KERN_INFO "------------------------------------\n");
492                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
493                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
494                         "<-- Adv Rx Read format\n");
495                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
496                         "[vl er S cks ln] ---------------- [bi->skb] "
497                         "<-- Adv Rx Write-Back format\n");
498
499                 for (i = 0; i < rx_ring->count; i++) {
500                         struct igb_rx_buffer *buffer_info;
501                         buffer_info = &rx_ring->rx_buffer_info[i];
502                         rx_desc = IGB_RX_DESC(rx_ring, i);
503                         u0 = (struct my_u0 *)rx_desc;
504                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
505                         if (staterr & E1000_RXD_STAT_DD) {
506                                 /* Descriptor Done */
507                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
508                                         "%016llX ---------------- %p", i,
509                                         le64_to_cpu(u0->a),
510                                         le64_to_cpu(u0->b),
511                                         buffer_info->skb);
512                         } else {
513                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
514                                         "%016llX %016llX %p", i,
515                                         le64_to_cpu(u0->a),
516                                         le64_to_cpu(u0->b),
517                                         (u64)buffer_info->dma,
518                                         buffer_info->skb);
519
520                                 if (netif_msg_pktdata(adapter)) {
521                                         print_hex_dump(KERN_INFO, "",
522                                                 DUMP_PREFIX_ADDRESS,
523                                                 16, 1,
524                                                 phys_to_virt(buffer_info->dma),
525                                                 IGB_RX_HDR_LEN, true);
526                                         print_hex_dump(KERN_INFO, "",
527                                           DUMP_PREFIX_ADDRESS,
528                                           16, 1,
529                                           phys_to_virt(
530                                             buffer_info->page_dma +
531                                             buffer_info->page_offset),
532                                           PAGE_SIZE/2, true);
533                                 }
534                         }
535
536                         if (i == rx_ring->next_to_use)
537                                 printk(KERN_CONT " NTU\n");
538                         else if (i == rx_ring->next_to_clean)
539                                 printk(KERN_CONT " NTC\n");
540                         else
541                                 printk(KERN_CONT "\n");
542
543                 }
544         }
545
546 exit:
547         return;
548 }
549
550
551 /**
552  * igb_read_clock - read raw cycle counter (to be used by time counter)
553  */
554 static cycle_t igb_read_clock(const struct cyclecounter *tc)
555 {
556         struct igb_adapter *adapter =
557                 container_of(tc, struct igb_adapter, cycles);
558         struct e1000_hw *hw = &adapter->hw;
559         u64 stamp = 0;
560         int shift = 0;
561
562         /*
563          * The timestamp latches on lowest register read. For the 82580
564          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
565          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
566          */
567         if (hw->mac.type == e1000_82580) {
568                 stamp = rd32(E1000_SYSTIMR) >> 8;
569                 shift = IGB_82580_TSYNC_SHIFT;
570         }
571
572         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
573         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
574         return stamp;
575 }
576
577 /**
578  * igb_get_hw_dev - return device
579  * used by hardware layer to print debugging information
580  **/
581 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
582 {
583         struct igb_adapter *adapter = hw->back;
584         return adapter->netdev;
585 }
586
587 /**
588  * igb_init_module - Driver Registration Routine
589  *
590  * igb_init_module is the first routine called when the driver is
591  * loaded. All it does is register with the PCI subsystem.
592  **/
593 static int __init igb_init_module(void)
594 {
595         int ret;
596         printk(KERN_INFO "%s - version %s\n",
597                igb_driver_string, igb_driver_version);
598
599         printk(KERN_INFO "%s\n", igb_copyright);
600
601 #ifdef CONFIG_IGB_DCA
602         dca_register_notify(&dca_notifier);
603 #endif
604         ret = pci_register_driver(&igb_driver);
605         return ret;
606 }
607
608 module_init(igb_init_module);
609
610 /**
611  * igb_exit_module - Driver Exit Cleanup Routine
612  *
613  * igb_exit_module is called just before the driver is removed
614  * from memory.
615  **/
616 static void __exit igb_exit_module(void)
617 {
618 #ifdef CONFIG_IGB_DCA
619         dca_unregister_notify(&dca_notifier);
620 #endif
621         pci_unregister_driver(&igb_driver);
622 }
623
624 module_exit(igb_exit_module);
625
626 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
627 /**
628  * igb_cache_ring_register - Descriptor ring to register mapping
629  * @adapter: board private structure to initialize
630  *
631  * Once we know the feature-set enabled for the device, we'll cache
632  * the register offset the descriptor ring is assigned to.
633  **/
634 static void igb_cache_ring_register(struct igb_adapter *adapter)
635 {
636         int i = 0, j = 0;
637         u32 rbase_offset = adapter->vfs_allocated_count;
638
639         switch (adapter->hw.mac.type) {
640         case e1000_82576:
641                 /* The queues are allocated for virtualization such that VF 0
642                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
643                  * In order to avoid collision we start at the first free queue
644                  * and continue consuming queues in the same sequence
645                  */
646                 if (adapter->vfs_allocated_count) {
647                         for (; i < adapter->rss_queues; i++)
648                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
649                                                                Q_IDX_82576(i);
650                 }
651         case e1000_82575:
652         case e1000_82580:
653         case e1000_i350:
654         default:
655                 for (; i < adapter->num_rx_queues; i++)
656                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
657                 for (; j < adapter->num_tx_queues; j++)
658                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
659                 break;
660         }
661 }
662
663 static void igb_free_queues(struct igb_adapter *adapter)
664 {
665         int i;
666
667         for (i = 0; i < adapter->num_tx_queues; i++) {
668                 kfree(adapter->tx_ring[i]);
669                 adapter->tx_ring[i] = NULL;
670         }
671         for (i = 0; i < adapter->num_rx_queues; i++) {
672                 kfree(adapter->rx_ring[i]);
673                 adapter->rx_ring[i] = NULL;
674         }
675         adapter->num_rx_queues = 0;
676         adapter->num_tx_queues = 0;
677 }
678
679 /**
680  * igb_alloc_queues - Allocate memory for all rings
681  * @adapter: board private structure to initialize
682  *
683  * We allocate one ring per queue at run-time since we don't know the
684  * number of queues at compile-time.
685  **/
686 static int igb_alloc_queues(struct igb_adapter *adapter)
687 {
688         struct igb_ring *ring;
689         int i;
690
691         for (i = 0; i < adapter->num_tx_queues; i++) {
692                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
693                 if (!ring)
694                         goto err;
695                 ring->count = adapter->tx_ring_count;
696                 ring->queue_index = i;
697                 ring->dev = &adapter->pdev->dev;
698                 ring->netdev = adapter->netdev;
699                 /* For 82575, context index must be unique per ring. */
700                 if (adapter->hw.mac.type == e1000_82575)
701                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
702                 adapter->tx_ring[i] = ring;
703         }
704
705         for (i = 0; i < adapter->num_rx_queues; i++) {
706                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
707                 if (!ring)
708                         goto err;
709                 ring->count = adapter->rx_ring_count;
710                 ring->queue_index = i;
711                 ring->dev = &adapter->pdev->dev;
712                 ring->netdev = adapter->netdev;
713                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
714                 /* set flag indicating ring supports SCTP checksum offload */
715                 if (adapter->hw.mac.type >= e1000_82576)
716                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
717                 adapter->rx_ring[i] = ring;
718         }
719
720         igb_cache_ring_register(adapter);
721
722         return 0;
723
724 err:
725         igb_free_queues(adapter);
726
727         return -ENOMEM;
728 }
729
730 #define IGB_N0_QUEUE -1
731 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
732 {
733         u32 msixbm = 0;
734         struct igb_adapter *adapter = q_vector->adapter;
735         struct e1000_hw *hw = &adapter->hw;
736         u32 ivar, index;
737         int rx_queue = IGB_N0_QUEUE;
738         int tx_queue = IGB_N0_QUEUE;
739
740         if (q_vector->rx_ring)
741                 rx_queue = q_vector->rx_ring->reg_idx;
742         if (q_vector->tx_ring)
743                 tx_queue = q_vector->tx_ring->reg_idx;
744
745         switch (hw->mac.type) {
746         case e1000_82575:
747                 /* The 82575 assigns vectors using a bitmask, which matches the
748                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
749                    or more queues to a vector, we write the appropriate bits
750                    into the MSIXBM register for that vector. */
751                 if (rx_queue > IGB_N0_QUEUE)
752                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
753                 if (tx_queue > IGB_N0_QUEUE)
754                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
755                 if (!adapter->msix_entries && msix_vector == 0)
756                         msixbm |= E1000_EIMS_OTHER;
757                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
758                 q_vector->eims_value = msixbm;
759                 break;
760         case e1000_82576:
761                 /* 82576 uses a table-based method for assigning vectors.
762                    Each queue has a single entry in the table to which we write
763                    a vector number along with a "valid" bit.  Sadly, the layout
764                    of the table is somewhat counterintuitive. */
765                 if (rx_queue > IGB_N0_QUEUE) {
766                         index = (rx_queue & 0x7);
767                         ivar = array_rd32(E1000_IVAR0, index);
768                         if (rx_queue < 8) {
769                                 /* vector goes into low byte of register */
770                                 ivar = ivar & 0xFFFFFF00;
771                                 ivar |= msix_vector | E1000_IVAR_VALID;
772                         } else {
773                                 /* vector goes into third byte of register */
774                                 ivar = ivar & 0xFF00FFFF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
776                         }
777                         array_wr32(E1000_IVAR0, index, ivar);
778                 }
779                 if (tx_queue > IGB_N0_QUEUE) {
780                         index = (tx_queue & 0x7);
781                         ivar = array_rd32(E1000_IVAR0, index);
782                         if (tx_queue < 8) {
783                                 /* vector goes into second byte of register */
784                                 ivar = ivar & 0xFFFF00FF;
785                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
786                         } else {
787                                 /* vector goes into high byte of register */
788                                 ivar = ivar & 0x00FFFFFF;
789                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
790                         }
791                         array_wr32(E1000_IVAR0, index, ivar);
792                 }
793                 q_vector->eims_value = 1 << msix_vector;
794                 break;
795         case e1000_82580:
796         case e1000_i350:
797                 /* 82580 uses the same table-based approach as 82576 but has fewer
798                    entries as a result we carry over for queues greater than 4. */
799                 if (rx_queue > IGB_N0_QUEUE) {
800                         index = (rx_queue >> 1);
801                         ivar = array_rd32(E1000_IVAR0, index);
802                         if (rx_queue & 0x1) {
803                                 /* vector goes into third byte of register */
804                                 ivar = ivar & 0xFF00FFFF;
805                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
806                         } else {
807                                 /* vector goes into low byte of register */
808                                 ivar = ivar & 0xFFFFFF00;
809                                 ivar |= msix_vector | E1000_IVAR_VALID;
810                         }
811                         array_wr32(E1000_IVAR0, index, ivar);
812                 }
813                 if (tx_queue > IGB_N0_QUEUE) {
814                         index = (tx_queue >> 1);
815                         ivar = array_rd32(E1000_IVAR0, index);
816                         if (tx_queue & 0x1) {
817                                 /* vector goes into high byte of register */
818                                 ivar = ivar & 0x00FFFFFF;
819                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
820                         } else {
821                                 /* vector goes into second byte of register */
822                                 ivar = ivar & 0xFFFF00FF;
823                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
824                         }
825                         array_wr32(E1000_IVAR0, index, ivar);
826                 }
827                 q_vector->eims_value = 1 << msix_vector;
828                 break;
829         default:
830                 BUG();
831                 break;
832         }
833
834         /* add q_vector eims value to global eims_enable_mask */
835         adapter->eims_enable_mask |= q_vector->eims_value;
836
837         /* configure q_vector to set itr on first interrupt */
838         q_vector->set_itr = 1;
839 }
840
841 /**
842  * igb_configure_msix - Configure MSI-X hardware
843  *
844  * igb_configure_msix sets up the hardware to properly
845  * generate MSI-X interrupts.
846  **/
847 static void igb_configure_msix(struct igb_adapter *adapter)
848 {
849         u32 tmp;
850         int i, vector = 0;
851         struct e1000_hw *hw = &adapter->hw;
852
853         adapter->eims_enable_mask = 0;
854
855         /* set vector for other causes, i.e. link changes */
856         switch (hw->mac.type) {
857         case e1000_82575:
858                 tmp = rd32(E1000_CTRL_EXT);
859                 /* enable MSI-X PBA support*/
860                 tmp |= E1000_CTRL_EXT_PBA_CLR;
861
862                 /* Auto-Mask interrupts upon ICR read. */
863                 tmp |= E1000_CTRL_EXT_EIAME;
864                 tmp |= E1000_CTRL_EXT_IRCA;
865
866                 wr32(E1000_CTRL_EXT, tmp);
867
868                 /* enable msix_other interrupt */
869                 array_wr32(E1000_MSIXBM(0), vector++,
870                                       E1000_EIMS_OTHER);
871                 adapter->eims_other = E1000_EIMS_OTHER;
872
873                 break;
874
875         case e1000_82576:
876         case e1000_82580:
877         case e1000_i350:
878                 /* Turn on MSI-X capability first, or our settings
879                  * won't stick.  And it will take days to debug. */
880                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
881                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
882                                 E1000_GPIE_NSICR);
883
884                 /* enable msix_other interrupt */
885                 adapter->eims_other = 1 << vector;
886                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
887
888                 wr32(E1000_IVAR_MISC, tmp);
889                 break;
890         default:
891                 /* do nothing, since nothing else supports MSI-X */
892                 break;
893         } /* switch (hw->mac.type) */
894
895         adapter->eims_enable_mask |= adapter->eims_other;
896
897         for (i = 0; i < adapter->num_q_vectors; i++)
898                 igb_assign_vector(adapter->q_vector[i], vector++);
899
900         wrfl();
901 }
902
903 /**
904  * igb_request_msix - Initialize MSI-X interrupts
905  *
906  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
907  * kernel.
908  **/
909 static int igb_request_msix(struct igb_adapter *adapter)
910 {
911         struct net_device *netdev = adapter->netdev;
912         struct e1000_hw *hw = &adapter->hw;
913         int i, err = 0, vector = 0;
914
915         err = request_irq(adapter->msix_entries[vector].vector,
916                           igb_msix_other, 0, netdev->name, adapter);
917         if (err)
918                 goto out;
919         vector++;
920
921         for (i = 0; i < adapter->num_q_vectors; i++) {
922                 struct igb_q_vector *q_vector = adapter->q_vector[i];
923
924                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
925
926                 if (q_vector->rx_ring && q_vector->tx_ring)
927                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
928                                 q_vector->rx_ring->queue_index);
929                 else if (q_vector->tx_ring)
930                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
931                                 q_vector->tx_ring->queue_index);
932                 else if (q_vector->rx_ring)
933                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
934                                 q_vector->rx_ring->queue_index);
935                 else
936                         sprintf(q_vector->name, "%s-unused", netdev->name);
937
938                 err = request_irq(adapter->msix_entries[vector].vector,
939                                   igb_msix_ring, 0, q_vector->name,
940                                   q_vector);
941                 if (err)
942                         goto out;
943                 vector++;
944         }
945
946         igb_configure_msix(adapter);
947         return 0;
948 out:
949         return err;
950 }
951
952 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
953 {
954         if (adapter->msix_entries) {
955                 pci_disable_msix(adapter->pdev);
956                 kfree(adapter->msix_entries);
957                 adapter->msix_entries = NULL;
958         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
959                 pci_disable_msi(adapter->pdev);
960         }
961 }
962
963 /**
964  * igb_free_q_vectors - Free memory allocated for interrupt vectors
965  * @adapter: board private structure to initialize
966  *
967  * This function frees the memory allocated to the q_vectors.  In addition if
968  * NAPI is enabled it will delete any references to the NAPI struct prior
969  * to freeing the q_vector.
970  **/
971 static void igb_free_q_vectors(struct igb_adapter *adapter)
972 {
973         int v_idx;
974
975         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
976                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
977                 adapter->q_vector[v_idx] = NULL;
978                 if (!q_vector)
979                         continue;
980                 netif_napi_del(&q_vector->napi);
981                 kfree(q_vector);
982         }
983         adapter->num_q_vectors = 0;
984 }
985
986 /**
987  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
988  *
989  * This function resets the device so that it has 0 rx queues, tx queues, and
990  * MSI-X interrupts allocated.
991  */
992 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
993 {
994         igb_free_queues(adapter);
995         igb_free_q_vectors(adapter);
996         igb_reset_interrupt_capability(adapter);
997 }
998
999 /**
1000  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1001  *
1002  * Attempt to configure interrupts using the best available
1003  * capabilities of the hardware and kernel.
1004  **/
1005 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1006 {
1007         int err;
1008         int numvecs, i;
1009
1010         /* Number of supported queues. */
1011         adapter->num_rx_queues = adapter->rss_queues;
1012         if (adapter->vfs_allocated_count)
1013                 adapter->num_tx_queues = 1;
1014         else
1015                 adapter->num_tx_queues = adapter->rss_queues;
1016
1017         /* start with one vector for every rx queue */
1018         numvecs = adapter->num_rx_queues;
1019
1020         /* if tx handler is separate add 1 for every tx queue */
1021         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1022                 numvecs += adapter->num_tx_queues;
1023
1024         /* store the number of vectors reserved for queues */
1025         adapter->num_q_vectors = numvecs;
1026
1027         /* add 1 vector for link status interrupts */
1028         numvecs++;
1029         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1030                                         GFP_KERNEL);
1031         if (!adapter->msix_entries)
1032                 goto msi_only;
1033
1034         for (i = 0; i < numvecs; i++)
1035                 adapter->msix_entries[i].entry = i;
1036
1037         err = pci_enable_msix(adapter->pdev,
1038                               adapter->msix_entries,
1039                               numvecs);
1040         if (err == 0)
1041                 goto out;
1042
1043         igb_reset_interrupt_capability(adapter);
1044
1045         /* If we can't do MSI-X, try MSI */
1046 msi_only:
1047 #ifdef CONFIG_PCI_IOV
1048         /* disable SR-IOV for non MSI-X configurations */
1049         if (adapter->vf_data) {
1050                 struct e1000_hw *hw = &adapter->hw;
1051                 /* disable iov and allow time for transactions to clear */
1052                 pci_disable_sriov(adapter->pdev);
1053                 msleep(500);
1054
1055                 kfree(adapter->vf_data);
1056                 adapter->vf_data = NULL;
1057                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1058                 wrfl();
1059                 msleep(100);
1060                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1061         }
1062 #endif
1063         adapter->vfs_allocated_count = 0;
1064         adapter->rss_queues = 1;
1065         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1066         adapter->num_rx_queues = 1;
1067         adapter->num_tx_queues = 1;
1068         adapter->num_q_vectors = 1;
1069         if (!pci_enable_msi(adapter->pdev))
1070                 adapter->flags |= IGB_FLAG_HAS_MSI;
1071 out:
1072         /* Notify the stack of the (possibly) reduced queue counts. */
1073         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1074         return netif_set_real_num_rx_queues(adapter->netdev,
1075                                             adapter->num_rx_queues);
1076 }
1077
1078 /**
1079  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1080  * @adapter: board private structure to initialize
1081  *
1082  * We allocate one q_vector per queue interrupt.  If allocation fails we
1083  * return -ENOMEM.
1084  **/
1085 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1086 {
1087         struct igb_q_vector *q_vector;
1088         struct e1000_hw *hw = &adapter->hw;
1089         int v_idx;
1090
1091         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1092                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1093                 if (!q_vector)
1094                         goto err_out;
1095                 q_vector->adapter = adapter;
1096                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1097                 q_vector->itr_val = IGB_START_ITR;
1098                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1099                 adapter->q_vector[v_idx] = q_vector;
1100         }
1101         return 0;
1102
1103 err_out:
1104         igb_free_q_vectors(adapter);
1105         return -ENOMEM;
1106 }
1107
1108 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1109                                       int ring_idx, int v_idx)
1110 {
1111         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1112
1113         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1114         q_vector->rx_ring->q_vector = q_vector;
1115         q_vector->itr_val = adapter->rx_itr_setting;
1116         if (q_vector->itr_val && q_vector->itr_val <= 3)
1117                 q_vector->itr_val = IGB_START_ITR;
1118 }
1119
1120 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1121                                       int ring_idx, int v_idx)
1122 {
1123         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1124
1125         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1126         q_vector->tx_ring->q_vector = q_vector;
1127         q_vector->itr_val = adapter->tx_itr_setting;
1128         q_vector->tx_work_limit = adapter->tx_work_limit;
1129         if (q_vector->itr_val && q_vector->itr_val <= 3)
1130                 q_vector->itr_val = IGB_START_ITR;
1131 }
1132
1133 /**
1134  * igb_map_ring_to_vector - maps allocated queues to vectors
1135  *
1136  * This function maps the recently allocated queues to vectors.
1137  **/
1138 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1139 {
1140         int i;
1141         int v_idx = 0;
1142
1143         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1144             (adapter->num_q_vectors < adapter->num_tx_queues))
1145                 return -ENOMEM;
1146
1147         if (adapter->num_q_vectors >=
1148             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1149                 for (i = 0; i < adapter->num_rx_queues; i++)
1150                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1151                 for (i = 0; i < adapter->num_tx_queues; i++)
1152                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1153         } else {
1154                 for (i = 0; i < adapter->num_rx_queues; i++) {
1155                         if (i < adapter->num_tx_queues)
1156                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1157                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1158                 }
1159                 for (; i < adapter->num_tx_queues; i++)
1160                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1161         }
1162         return 0;
1163 }
1164
1165 /**
1166  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1167  *
1168  * This function initializes the interrupts and allocates all of the queues.
1169  **/
1170 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1171 {
1172         struct pci_dev *pdev = adapter->pdev;
1173         int err;
1174
1175         err = igb_set_interrupt_capability(adapter);
1176         if (err)
1177                 return err;
1178
1179         err = igb_alloc_q_vectors(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1182                 goto err_alloc_q_vectors;
1183         }
1184
1185         err = igb_alloc_queues(adapter);
1186         if (err) {
1187                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1188                 goto err_alloc_queues;
1189         }
1190
1191         err = igb_map_ring_to_vector(adapter);
1192         if (err) {
1193                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1194                 goto err_map_queues;
1195         }
1196
1197
1198         return 0;
1199 err_map_queues:
1200         igb_free_queues(adapter);
1201 err_alloc_queues:
1202         igb_free_q_vectors(adapter);
1203 err_alloc_q_vectors:
1204         igb_reset_interrupt_capability(adapter);
1205         return err;
1206 }
1207
1208 /**
1209  * igb_request_irq - initialize interrupts
1210  *
1211  * Attempts to configure interrupts using the best available
1212  * capabilities of the hardware and kernel.
1213  **/
1214 static int igb_request_irq(struct igb_adapter *adapter)
1215 {
1216         struct net_device *netdev = adapter->netdev;
1217         struct pci_dev *pdev = adapter->pdev;
1218         int err = 0;
1219
1220         if (adapter->msix_entries) {
1221                 err = igb_request_msix(adapter);
1222                 if (!err)
1223                         goto request_done;
1224                 /* fall back to MSI */
1225                 igb_clear_interrupt_scheme(adapter);
1226                 if (!pci_enable_msi(adapter->pdev))
1227                         adapter->flags |= IGB_FLAG_HAS_MSI;
1228                 igb_free_all_tx_resources(adapter);
1229                 igb_free_all_rx_resources(adapter);
1230                 adapter->num_tx_queues = 1;
1231                 adapter->num_rx_queues = 1;
1232                 adapter->num_q_vectors = 1;
1233                 err = igb_alloc_q_vectors(adapter);
1234                 if (err) {
1235                         dev_err(&pdev->dev,
1236                                 "Unable to allocate memory for vectors\n");
1237                         goto request_done;
1238                 }
1239                 err = igb_alloc_queues(adapter);
1240                 if (err) {
1241                         dev_err(&pdev->dev,
1242                                 "Unable to allocate memory for queues\n");
1243                         igb_free_q_vectors(adapter);
1244                         goto request_done;
1245                 }
1246                 igb_setup_all_tx_resources(adapter);
1247                 igb_setup_all_rx_resources(adapter);
1248         } else {
1249                 igb_assign_vector(adapter->q_vector[0], 0);
1250         }
1251
1252         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1253                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1254                                   netdev->name, adapter);
1255                 if (!err)
1256                         goto request_done;
1257
1258                 /* fall back to legacy interrupts */
1259                 igb_reset_interrupt_capability(adapter);
1260                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1261         }
1262
1263         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1264                           netdev->name, adapter);
1265
1266         if (err)
1267                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1268                         err);
1269
1270 request_done:
1271         return err;
1272 }
1273
1274 static void igb_free_irq(struct igb_adapter *adapter)
1275 {
1276         if (adapter->msix_entries) {
1277                 int vector = 0, i;
1278
1279                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1280
1281                 for (i = 0; i < adapter->num_q_vectors; i++) {
1282                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1283                         free_irq(adapter->msix_entries[vector++].vector,
1284                                  q_vector);
1285                 }
1286         } else {
1287                 free_irq(adapter->pdev->irq, adapter);
1288         }
1289 }
1290
1291 /**
1292  * igb_irq_disable - Mask off interrupt generation on the NIC
1293  * @adapter: board private structure
1294  **/
1295 static void igb_irq_disable(struct igb_adapter *adapter)
1296 {
1297         struct e1000_hw *hw = &adapter->hw;
1298
1299         /*
1300          * we need to be careful when disabling interrupts.  The VFs are also
1301          * mapped into these registers and so clearing the bits can cause
1302          * issues on the VF drivers so we only need to clear what we set
1303          */
1304         if (adapter->msix_entries) {
1305                 u32 regval = rd32(E1000_EIAM);
1306                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1307                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1308                 regval = rd32(E1000_EIAC);
1309                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1310         }
1311
1312         wr32(E1000_IAM, 0);
1313         wr32(E1000_IMC, ~0);
1314         wrfl();
1315         if (adapter->msix_entries) {
1316                 int i;
1317                 for (i = 0; i < adapter->num_q_vectors; i++)
1318                         synchronize_irq(adapter->msix_entries[i].vector);
1319         } else {
1320                 synchronize_irq(adapter->pdev->irq);
1321         }
1322 }
1323
1324 /**
1325  * igb_irq_enable - Enable default interrupt generation settings
1326  * @adapter: board private structure
1327  **/
1328 static void igb_irq_enable(struct igb_adapter *adapter)
1329 {
1330         struct e1000_hw *hw = &adapter->hw;
1331
1332         if (adapter->msix_entries) {
1333                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1334                 u32 regval = rd32(E1000_EIAC);
1335                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1336                 regval = rd32(E1000_EIAM);
1337                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1338                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1339                 if (adapter->vfs_allocated_count) {
1340                         wr32(E1000_MBVFIMR, 0xFF);
1341                         ims |= E1000_IMS_VMMB;
1342                 }
1343                 if (adapter->hw.mac.type == e1000_82580)
1344                         ims |= E1000_IMS_DRSTA;
1345
1346                 wr32(E1000_IMS, ims);
1347         } else {
1348                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1349                                 E1000_IMS_DRSTA);
1350                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1351                                 E1000_IMS_DRSTA);
1352         }
1353 }
1354
1355 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1356 {
1357         struct e1000_hw *hw = &adapter->hw;
1358         u16 vid = adapter->hw.mng_cookie.vlan_id;
1359         u16 old_vid = adapter->mng_vlan_id;
1360
1361         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1362                 /* add VID to filter table */
1363                 igb_vfta_set(hw, vid, true);
1364                 adapter->mng_vlan_id = vid;
1365         } else {
1366                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1367         }
1368
1369         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1370             (vid != old_vid) &&
1371             !test_bit(old_vid, adapter->active_vlans)) {
1372                 /* remove VID from filter table */
1373                 igb_vfta_set(hw, old_vid, false);
1374         }
1375 }
1376
1377 /**
1378  * igb_release_hw_control - release control of the h/w to f/w
1379  * @adapter: address of board private structure
1380  *
1381  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1382  * For ASF and Pass Through versions of f/w this means that the
1383  * driver is no longer loaded.
1384  *
1385  **/
1386 static void igb_release_hw_control(struct igb_adapter *adapter)
1387 {
1388         struct e1000_hw *hw = &adapter->hw;
1389         u32 ctrl_ext;
1390
1391         /* Let firmware take over control of h/w */
1392         ctrl_ext = rd32(E1000_CTRL_EXT);
1393         wr32(E1000_CTRL_EXT,
1394                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1395 }
1396
1397 /**
1398  * igb_get_hw_control - get control of the h/w from f/w
1399  * @adapter: address of board private structure
1400  *
1401  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1402  * For ASF and Pass Through versions of f/w this means that
1403  * the driver is loaded.
1404  *
1405  **/
1406 static void igb_get_hw_control(struct igb_adapter *adapter)
1407 {
1408         struct e1000_hw *hw = &adapter->hw;
1409         u32 ctrl_ext;
1410
1411         /* Let firmware know the driver has taken over */
1412         ctrl_ext = rd32(E1000_CTRL_EXT);
1413         wr32(E1000_CTRL_EXT,
1414                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1415 }
1416
1417 /**
1418  * igb_configure - configure the hardware for RX and TX
1419  * @adapter: private board structure
1420  **/
1421 static void igb_configure(struct igb_adapter *adapter)
1422 {
1423         struct net_device *netdev = adapter->netdev;
1424         int i;
1425
1426         igb_get_hw_control(adapter);
1427         igb_set_rx_mode(netdev);
1428
1429         igb_restore_vlan(adapter);
1430
1431         igb_setup_tctl(adapter);
1432         igb_setup_mrqc(adapter);
1433         igb_setup_rctl(adapter);
1434
1435         igb_configure_tx(adapter);
1436         igb_configure_rx(adapter);
1437
1438         igb_rx_fifo_flush_82575(&adapter->hw);
1439
1440         /* call igb_desc_unused which always leaves
1441          * at least 1 descriptor unused to make sure
1442          * next_to_use != next_to_clean */
1443         for (i = 0; i < adapter->num_rx_queues; i++) {
1444                 struct igb_ring *ring = adapter->rx_ring[i];
1445                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1446         }
1447 }
1448
1449 /**
1450  * igb_power_up_link - Power up the phy/serdes link
1451  * @adapter: address of board private structure
1452  **/
1453 void igb_power_up_link(struct igb_adapter *adapter)
1454 {
1455         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456                 igb_power_up_phy_copper(&adapter->hw);
1457         else
1458                 igb_power_up_serdes_link_82575(&adapter->hw);
1459 }
1460
1461 /**
1462  * igb_power_down_link - Power down the phy/serdes link
1463  * @adapter: address of board private structure
1464  */
1465 static void igb_power_down_link(struct igb_adapter *adapter)
1466 {
1467         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1468                 igb_power_down_phy_copper_82575(&adapter->hw);
1469         else
1470                 igb_shutdown_serdes_link_82575(&adapter->hw);
1471 }
1472
1473 /**
1474  * igb_up - Open the interface and prepare it to handle traffic
1475  * @adapter: board private structure
1476  **/
1477 int igb_up(struct igb_adapter *adapter)
1478 {
1479         struct e1000_hw *hw = &adapter->hw;
1480         int i;
1481
1482         /* hardware has been reset, we need to reload some things */
1483         igb_configure(adapter);
1484
1485         clear_bit(__IGB_DOWN, &adapter->state);
1486
1487         for (i = 0; i < adapter->num_q_vectors; i++) {
1488                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1489                 napi_enable(&q_vector->napi);
1490         }
1491         if (adapter->msix_entries)
1492                 igb_configure_msix(adapter);
1493         else
1494                 igb_assign_vector(adapter->q_vector[0], 0);
1495
1496         /* Clear any pending interrupts. */
1497         rd32(E1000_ICR);
1498         igb_irq_enable(adapter);
1499
1500         /* notify VFs that reset has been completed */
1501         if (adapter->vfs_allocated_count) {
1502                 u32 reg_data = rd32(E1000_CTRL_EXT);
1503                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1504                 wr32(E1000_CTRL_EXT, reg_data);
1505         }
1506
1507         netif_tx_start_all_queues(adapter->netdev);
1508
1509         /* start the watchdog. */
1510         hw->mac.get_link_status = 1;
1511         schedule_work(&adapter->watchdog_task);
1512
1513         return 0;
1514 }
1515
1516 void igb_down(struct igb_adapter *adapter)
1517 {
1518         struct net_device *netdev = adapter->netdev;
1519         struct e1000_hw *hw = &adapter->hw;
1520         u32 tctl, rctl;
1521         int i;
1522
1523         /* signal that we're down so the interrupt handler does not
1524          * reschedule our watchdog timer */
1525         set_bit(__IGB_DOWN, &adapter->state);
1526
1527         /* disable receives in the hardware */
1528         rctl = rd32(E1000_RCTL);
1529         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1530         /* flush and sleep below */
1531
1532         netif_tx_stop_all_queues(netdev);
1533
1534         /* disable transmits in the hardware */
1535         tctl = rd32(E1000_TCTL);
1536         tctl &= ~E1000_TCTL_EN;
1537         wr32(E1000_TCTL, tctl);
1538         /* flush both disables and wait for them to finish */
1539         wrfl();
1540         msleep(10);
1541
1542         for (i = 0; i < adapter->num_q_vectors; i++) {
1543                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1544                 napi_disable(&q_vector->napi);
1545         }
1546
1547         igb_irq_disable(adapter);
1548
1549         del_timer_sync(&adapter->watchdog_timer);
1550         del_timer_sync(&adapter->phy_info_timer);
1551
1552         netif_carrier_off(netdev);
1553
1554         /* record the stats before reset*/
1555         spin_lock(&adapter->stats64_lock);
1556         igb_update_stats(adapter, &adapter->stats64);
1557         spin_unlock(&adapter->stats64_lock);
1558
1559         adapter->link_speed = 0;
1560         adapter->link_duplex = 0;
1561
1562         if (!pci_channel_offline(adapter->pdev))
1563                 igb_reset(adapter);
1564         igb_clean_all_tx_rings(adapter);
1565         igb_clean_all_rx_rings(adapter);
1566 #ifdef CONFIG_IGB_DCA
1567
1568         /* since we reset the hardware DCA settings were cleared */
1569         igb_setup_dca(adapter);
1570 #endif
1571 }
1572
1573 void igb_reinit_locked(struct igb_adapter *adapter)
1574 {
1575         WARN_ON(in_interrupt());
1576         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1577                 msleep(1);
1578         igb_down(adapter);
1579         igb_up(adapter);
1580         clear_bit(__IGB_RESETTING, &adapter->state);
1581 }
1582
1583 void igb_reset(struct igb_adapter *adapter)
1584 {
1585         struct pci_dev *pdev = adapter->pdev;
1586         struct e1000_hw *hw = &adapter->hw;
1587         struct e1000_mac_info *mac = &hw->mac;
1588         struct e1000_fc_info *fc = &hw->fc;
1589         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1590         u16 hwm;
1591
1592         /* Repartition Pba for greater than 9k mtu
1593          * To take effect CTRL.RST is required.
1594          */
1595         switch (mac->type) {
1596         case e1000_i350:
1597         case e1000_82580:
1598                 pba = rd32(E1000_RXPBS);
1599                 pba = igb_rxpbs_adjust_82580(pba);
1600                 break;
1601         case e1000_82576:
1602                 pba = rd32(E1000_RXPBS);
1603                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1604                 break;
1605         case e1000_82575:
1606         default:
1607                 pba = E1000_PBA_34K;
1608                 break;
1609         }
1610
1611         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1612             (mac->type < e1000_82576)) {
1613                 /* adjust PBA for jumbo frames */
1614                 wr32(E1000_PBA, pba);
1615
1616                 /* To maintain wire speed transmits, the Tx FIFO should be
1617                  * large enough to accommodate two full transmit packets,
1618                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1619                  * the Rx FIFO should be large enough to accommodate at least
1620                  * one full receive packet and is similarly rounded up and
1621                  * expressed in KB. */
1622                 pba = rd32(E1000_PBA);
1623                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1624                 tx_space = pba >> 16;
1625                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1626                 pba &= 0xffff;
1627                 /* the tx fifo also stores 16 bytes of information about the tx
1628                  * but don't include ethernet FCS because hardware appends it */
1629                 min_tx_space = (adapter->max_frame_size +
1630                                 sizeof(union e1000_adv_tx_desc) -
1631                                 ETH_FCS_LEN) * 2;
1632                 min_tx_space = ALIGN(min_tx_space, 1024);
1633                 min_tx_space >>= 10;
1634                 /* software strips receive CRC, so leave room for it */
1635                 min_rx_space = adapter->max_frame_size;
1636                 min_rx_space = ALIGN(min_rx_space, 1024);
1637                 min_rx_space >>= 10;
1638
1639                 /* If current Tx allocation is less than the min Tx FIFO size,
1640                  * and the min Tx FIFO size is less than the current Rx FIFO
1641                  * allocation, take space away from current Rx allocation */
1642                 if (tx_space < min_tx_space &&
1643                     ((min_tx_space - tx_space) < pba)) {
1644                         pba = pba - (min_tx_space - tx_space);
1645
1646                         /* if short on rx space, rx wins and must trump tx
1647                          * adjustment */
1648                         if (pba < min_rx_space)
1649                                 pba = min_rx_space;
1650                 }
1651                 wr32(E1000_PBA, pba);
1652         }
1653
1654         /* flow control settings */
1655         /* The high water mark must be low enough to fit one full frame
1656          * (or the size used for early receive) above it in the Rx FIFO.
1657          * Set it to the lower of:
1658          * - 90% of the Rx FIFO size, or
1659          * - the full Rx FIFO size minus one full frame */
1660         hwm = min(((pba << 10) * 9 / 10),
1661                         ((pba << 10) - 2 * adapter->max_frame_size));
1662
1663         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1664         fc->low_water = fc->high_water - 16;
1665         fc->pause_time = 0xFFFF;
1666         fc->send_xon = 1;
1667         fc->current_mode = fc->requested_mode;
1668
1669         /* disable receive for all VFs and wait one second */
1670         if (adapter->vfs_allocated_count) {
1671                 int i;
1672                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1673                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1674
1675                 /* ping all the active vfs to let them know we are going down */
1676                 igb_ping_all_vfs(adapter);
1677
1678                 /* disable transmits and receives */
1679                 wr32(E1000_VFRE, 0);
1680                 wr32(E1000_VFTE, 0);
1681         }
1682
1683         /* Allow time for pending master requests to run */
1684         hw->mac.ops.reset_hw(hw);
1685         wr32(E1000_WUC, 0);
1686
1687         if (hw->mac.ops.init_hw(hw))
1688                 dev_err(&pdev->dev, "Hardware Error\n");
1689         if (hw->mac.type > e1000_82580) {
1690                 if (adapter->flags & IGB_FLAG_DMAC) {
1691                         u32 reg;
1692
1693                         /*
1694                          * DMA Coalescing high water mark needs to be higher
1695                          * than * the * Rx threshold.  The Rx threshold is
1696                          * currently * pba - 6, so we * should use a high water
1697                          * mark of pba * - 4. */
1698                         hwm = (pba - 4) << 10;
1699
1700                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1701                                & E1000_DMACR_DMACTHR_MASK);
1702
1703                         /* transition to L0x or L1 if available..*/
1704                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1705
1706                         /* watchdog timer= +-1000 usec in 32usec intervals */
1707                         reg |= (1000 >> 5);
1708                         wr32(E1000_DMACR, reg);
1709
1710                         /* no lower threshold to disable coalescing(smart fifb)
1711                          * -UTRESH=0*/
1712                         wr32(E1000_DMCRTRH, 0);
1713
1714                         /* set hwm to PBA -  2 * max frame size */
1715                         wr32(E1000_FCRTC, hwm);
1716
1717                         /*
1718                          * This sets the time to wait before requesting tran-
1719                          * sition to * low power state to number of usecs needed
1720                          * to receive 1 512 * byte frame at gigabit line rate
1721                          */
1722                         reg = rd32(E1000_DMCTLX);
1723                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1724
1725                         /* Delay 255 usec before entering Lx state. */
1726                         reg |= 0xFF;
1727                         wr32(E1000_DMCTLX, reg);
1728
1729                         /* free space in Tx packet buffer to wake from DMAC */
1730                         wr32(E1000_DMCTXTH,
1731                              (IGB_MIN_TXPBSIZE -
1732                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1733                              >> 6);
1734
1735                         /* make low power state decision controlled by DMAC */
1736                         reg = rd32(E1000_PCIEMISC);
1737                         reg |= E1000_PCIEMISC_LX_DECISION;
1738                         wr32(E1000_PCIEMISC, reg);
1739                 } /* end if IGB_FLAG_DMAC set */
1740         }
1741         if (hw->mac.type == e1000_82580) {
1742                 u32 reg = rd32(E1000_PCIEMISC);
1743                 wr32(E1000_PCIEMISC,
1744                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1745         }
1746         if (!netif_running(adapter->netdev))
1747                 igb_power_down_link(adapter);
1748
1749         igb_update_mng_vlan(adapter);
1750
1751         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1753
1754         igb_get_phy_info(hw);
1755 }
1756
1757 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1758 {
1759         /*
1760          * Since there is no support for separate rx/tx vlan accel
1761          * enable/disable make sure tx flag is always in same state as rx.
1762          */
1763         if (features & NETIF_F_HW_VLAN_RX)
1764                 features |= NETIF_F_HW_VLAN_TX;
1765         else
1766                 features &= ~NETIF_F_HW_VLAN_TX;
1767
1768         return features;
1769 }
1770
1771 static int igb_set_features(struct net_device *netdev, u32 features)
1772 {
1773         struct igb_adapter *adapter = netdev_priv(netdev);
1774         int i;
1775         u32 changed = netdev->features ^ features;
1776
1777         for (i = 0; i < adapter->num_rx_queues; i++) {
1778                 if (features & NETIF_F_RXCSUM)
1779                         adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1780                 else
1781                         adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1782         }
1783
1784         if (changed & NETIF_F_HW_VLAN_RX)
1785                 igb_vlan_mode(netdev, features);
1786
1787         return 0;
1788 }
1789
1790 static const struct net_device_ops igb_netdev_ops = {
1791         .ndo_open               = igb_open,
1792         .ndo_stop               = igb_close,
1793         .ndo_start_xmit         = igb_xmit_frame,
1794         .ndo_get_stats64        = igb_get_stats64,
1795         .ndo_set_rx_mode        = igb_set_rx_mode,
1796         .ndo_set_mac_address    = igb_set_mac,
1797         .ndo_change_mtu         = igb_change_mtu,
1798         .ndo_do_ioctl           = igb_ioctl,
1799         .ndo_tx_timeout         = igb_tx_timeout,
1800         .ndo_validate_addr      = eth_validate_addr,
1801         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1802         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1803         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1804         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1805         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1806         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1807 #ifdef CONFIG_NET_POLL_CONTROLLER
1808         .ndo_poll_controller    = igb_netpoll,
1809 #endif
1810         .ndo_fix_features       = igb_fix_features,
1811         .ndo_set_features       = igb_set_features,
1812 };
1813
1814 /**
1815  * igb_probe - Device Initialization Routine
1816  * @pdev: PCI device information struct
1817  * @ent: entry in igb_pci_tbl
1818  *
1819  * Returns 0 on success, negative on failure
1820  *
1821  * igb_probe initializes an adapter identified by a pci_dev structure.
1822  * The OS initialization, configuring of the adapter private structure,
1823  * and a hardware reset occur.
1824  **/
1825 static int __devinit igb_probe(struct pci_dev *pdev,
1826                                const struct pci_device_id *ent)
1827 {
1828         struct net_device *netdev;
1829         struct igb_adapter *adapter;
1830         struct e1000_hw *hw;
1831         u16 eeprom_data = 0;
1832         s32 ret_val;
1833         static int global_quad_port_a; /* global quad port a indication */
1834         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1835         unsigned long mmio_start, mmio_len;
1836         int err, pci_using_dac;
1837         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1838         u8 part_str[E1000_PBANUM_LENGTH];
1839
1840         /* Catch broken hardware that put the wrong VF device ID in
1841          * the PCIe SR-IOV capability.
1842          */
1843         if (pdev->is_virtfn) {
1844                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1845                      pci_name(pdev), pdev->vendor, pdev->device);
1846                 return -EINVAL;
1847         }
1848
1849         err = pci_enable_device_mem(pdev);
1850         if (err)
1851                 return err;
1852
1853         pci_using_dac = 0;
1854         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1855         if (!err) {
1856                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1857                 if (!err)
1858                         pci_using_dac = 1;
1859         } else {
1860                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1861                 if (err) {
1862                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1863                         if (err) {
1864                                 dev_err(&pdev->dev, "No usable DMA "
1865                                         "configuration, aborting\n");
1866                                 goto err_dma;
1867                         }
1868                 }
1869         }
1870
1871         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1872                                            IORESOURCE_MEM),
1873                                            igb_driver_name);
1874         if (err)
1875                 goto err_pci_reg;
1876
1877         pci_enable_pcie_error_reporting(pdev);
1878
1879         pci_set_master(pdev);
1880         pci_save_state(pdev);
1881
1882         err = -ENOMEM;
1883         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1884                                    IGB_MAX_TX_QUEUES);
1885         if (!netdev)
1886                 goto err_alloc_etherdev;
1887
1888         SET_NETDEV_DEV(netdev, &pdev->dev);
1889
1890         pci_set_drvdata(pdev, netdev);
1891         adapter = netdev_priv(netdev);
1892         adapter->netdev = netdev;
1893         adapter->pdev = pdev;
1894         hw = &adapter->hw;
1895         hw->back = adapter;
1896         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1897
1898         mmio_start = pci_resource_start(pdev, 0);
1899         mmio_len = pci_resource_len(pdev, 0);
1900
1901         err = -EIO;
1902         hw->hw_addr = ioremap(mmio_start, mmio_len);
1903         if (!hw->hw_addr)
1904                 goto err_ioremap;
1905
1906         netdev->netdev_ops = &igb_netdev_ops;
1907         igb_set_ethtool_ops(netdev);
1908         netdev->watchdog_timeo = 5 * HZ;
1909
1910         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1911
1912         netdev->mem_start = mmio_start;
1913         netdev->mem_end = mmio_start + mmio_len;
1914
1915         /* PCI config space info */
1916         hw->vendor_id = pdev->vendor;
1917         hw->device_id = pdev->device;
1918         hw->revision_id = pdev->revision;
1919         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1920         hw->subsystem_device_id = pdev->subsystem_device;
1921
1922         /* Copy the default MAC, PHY and NVM function pointers */
1923         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1924         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1925         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1926         /* Initialize skew-specific constants */
1927         err = ei->get_invariants(hw);
1928         if (err)
1929                 goto err_sw_init;
1930
1931         /* setup the private structure */
1932         err = igb_sw_init(adapter);
1933         if (err)
1934                 goto err_sw_init;
1935
1936         igb_get_bus_info_pcie(hw);
1937
1938         hw->phy.autoneg_wait_to_complete = false;
1939
1940         /* Copper options */
1941         if (hw->phy.media_type == e1000_media_type_copper) {
1942                 hw->phy.mdix = AUTO_ALL_MODES;
1943                 hw->phy.disable_polarity_correction = false;
1944                 hw->phy.ms_type = e1000_ms_hw_default;
1945         }
1946
1947         if (igb_check_reset_block(hw))
1948                 dev_info(&pdev->dev,
1949                         "PHY reset is blocked due to SOL/IDER session.\n");
1950
1951         netdev->hw_features = NETIF_F_SG |
1952                            NETIF_F_IP_CSUM |
1953                            NETIF_F_IPV6_CSUM |
1954                            NETIF_F_TSO |
1955                            NETIF_F_TSO6 |
1956                            NETIF_F_RXCSUM |
1957                            NETIF_F_HW_VLAN_RX;
1958
1959         netdev->features = netdev->hw_features |
1960                            NETIF_F_HW_VLAN_TX |
1961                            NETIF_F_HW_VLAN_FILTER;
1962
1963         netdev->vlan_features |= NETIF_F_TSO;
1964         netdev->vlan_features |= NETIF_F_TSO6;
1965         netdev->vlan_features |= NETIF_F_IP_CSUM;
1966         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1967         netdev->vlan_features |= NETIF_F_SG;
1968
1969         if (pci_using_dac) {
1970                 netdev->features |= NETIF_F_HIGHDMA;
1971                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1972         }
1973
1974         if (hw->mac.type >= e1000_82576) {
1975                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1976                 netdev->features |= NETIF_F_SCTP_CSUM;
1977         }
1978
1979         netdev->priv_flags |= IFF_UNICAST_FLT;
1980
1981         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1982
1983         /* before reading the NVM, reset the controller to put the device in a
1984          * known good starting state */
1985         hw->mac.ops.reset_hw(hw);
1986
1987         /* make sure the NVM is good */
1988         if (hw->nvm.ops.validate(hw) < 0) {
1989                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1990                 err = -EIO;
1991                 goto err_eeprom;
1992         }
1993
1994         /* copy the MAC address out of the NVM */
1995         if (hw->mac.ops.read_mac_addr(hw))
1996                 dev_err(&pdev->dev, "NVM Read Error\n");
1997
1998         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1999         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2000
2001         if (!is_valid_ether_addr(netdev->perm_addr)) {
2002                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2003                 err = -EIO;
2004                 goto err_eeprom;
2005         }
2006
2007         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2008                     (unsigned long) adapter);
2009         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2010                     (unsigned long) adapter);
2011
2012         INIT_WORK(&adapter->reset_task, igb_reset_task);
2013         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2014
2015         /* Initialize link properties that are user-changeable */
2016         adapter->fc_autoneg = true;
2017         hw->mac.autoneg = true;
2018         hw->phy.autoneg_advertised = 0x2f;
2019
2020         hw->fc.requested_mode = e1000_fc_default;
2021         hw->fc.current_mode = e1000_fc_default;
2022
2023         igb_validate_mdi_setting(hw);
2024
2025         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2026          * enable the ACPI Magic Packet filter
2027          */
2028
2029         if (hw->bus.func == 0)
2030                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2031         else if (hw->mac.type >= e1000_82580)
2032                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2033                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2034                                  &eeprom_data);
2035         else if (hw->bus.func == 1)
2036                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2037
2038         if (eeprom_data & eeprom_apme_mask)
2039                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2040
2041         /* now that we have the eeprom settings, apply the special cases where
2042          * the eeprom may be wrong or the board simply won't support wake on
2043          * lan on a particular port */
2044         switch (pdev->device) {
2045         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2046                 adapter->eeprom_wol = 0;
2047                 break;
2048         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2049         case E1000_DEV_ID_82576_FIBER:
2050         case E1000_DEV_ID_82576_SERDES:
2051                 /* Wake events only supported on port A for dual fiber
2052                  * regardless of eeprom setting */
2053                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2054                         adapter->eeprom_wol = 0;
2055                 break;
2056         case E1000_DEV_ID_82576_QUAD_COPPER:
2057         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2058                 /* if quad port adapter, disable WoL on all but port A */
2059                 if (global_quad_port_a != 0)
2060                         adapter->eeprom_wol = 0;
2061                 else
2062                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2063                 /* Reset for multiple quad port adapters */
2064                 if (++global_quad_port_a == 4)
2065                         global_quad_port_a = 0;
2066                 break;
2067         }
2068
2069         /* initialize the wol settings based on the eeprom settings */
2070         adapter->wol = adapter->eeprom_wol;
2071         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2072
2073         /* reset the hardware with the new settings */
2074         igb_reset(adapter);
2075
2076         /* let the f/w know that the h/w is now under the control of the
2077          * driver. */
2078         igb_get_hw_control(adapter);
2079
2080         strcpy(netdev->name, "eth%d");
2081         err = register_netdev(netdev);
2082         if (err)
2083                 goto err_register;
2084
2085         igb_vlan_mode(netdev, netdev->features);
2086
2087         /* carrier off reporting is important to ethtool even BEFORE open */
2088         netif_carrier_off(netdev);
2089
2090 #ifdef CONFIG_IGB_DCA
2091         if (dca_add_requester(&pdev->dev) == 0) {
2092                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2093                 dev_info(&pdev->dev, "DCA enabled\n");
2094                 igb_setup_dca(adapter);
2095         }
2096
2097 #endif
2098         /* do hw tstamp init after resetting */
2099         igb_init_hw_timer(adapter);
2100
2101         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2102         /* print bus type/speed/width info */
2103         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2104                  netdev->name,
2105                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2106                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2107                                                             "unknown"),
2108                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2109                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2110                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2111                    "unknown"),
2112                  netdev->dev_addr);
2113
2114         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2115         if (ret_val)
2116                 strcpy(part_str, "Unknown");
2117         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2118         dev_info(&pdev->dev,
2119                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2120                 adapter->msix_entries ? "MSI-X" :
2121                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2122                 adapter->num_rx_queues, adapter->num_tx_queues);
2123         switch (hw->mac.type) {
2124         case e1000_i350:
2125                 igb_set_eee_i350(hw);
2126                 break;
2127         default:
2128                 break;
2129         }
2130         return 0;
2131
2132 err_register:
2133         igb_release_hw_control(adapter);
2134 err_eeprom:
2135         if (!igb_check_reset_block(hw))
2136                 igb_reset_phy(hw);
2137
2138         if (hw->flash_address)
2139                 iounmap(hw->flash_address);
2140 err_sw_init:
2141         igb_clear_interrupt_scheme(adapter);
2142         iounmap(hw->hw_addr);
2143 err_ioremap:
2144         free_netdev(netdev);
2145 err_alloc_etherdev:
2146         pci_release_selected_regions(pdev,
2147                                      pci_select_bars(pdev, IORESOURCE_MEM));
2148 err_pci_reg:
2149 err_dma:
2150         pci_disable_device(pdev);
2151         return err;
2152 }
2153
2154 /**
2155  * igb_remove - Device Removal Routine
2156  * @pdev: PCI device information struct
2157  *
2158  * igb_remove is called by the PCI subsystem to alert the driver
2159  * that it should release a PCI device.  The could be caused by a
2160  * Hot-Plug event, or because the driver is going to be removed from
2161  * memory.
2162  **/
2163 static void __devexit igb_remove(struct pci_dev *pdev)
2164 {
2165         struct net_device *netdev = pci_get_drvdata(pdev);
2166         struct igb_adapter *adapter = netdev_priv(netdev);
2167         struct e1000_hw *hw = &adapter->hw;
2168
2169         /*
2170          * The watchdog timer may be rescheduled, so explicitly
2171          * disable watchdog from being rescheduled.
2172          */
2173         set_bit(__IGB_DOWN, &adapter->state);
2174         del_timer_sync(&adapter->watchdog_timer);
2175         del_timer_sync(&adapter->phy_info_timer);
2176
2177         cancel_work_sync(&adapter->reset_task);
2178         cancel_work_sync(&adapter->watchdog_task);
2179
2180 #ifdef CONFIG_IGB_DCA
2181         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182                 dev_info(&pdev->dev, "DCA disabled\n");
2183                 dca_remove_requester(&pdev->dev);
2184                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2186         }
2187 #endif
2188
2189         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2190          * would have already happened in close and is redundant. */
2191         igb_release_hw_control(adapter);
2192
2193         unregister_netdev(netdev);
2194
2195         igb_clear_interrupt_scheme(adapter);
2196
2197 #ifdef CONFIG_PCI_IOV
2198         /* reclaim resources allocated to VFs */
2199         if (adapter->vf_data) {
2200                 /* disable iov and allow time for transactions to clear */
2201                 pci_disable_sriov(pdev);
2202                 msleep(500);
2203
2204                 kfree(adapter->vf_data);
2205                 adapter->vf_data = NULL;
2206                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2207                 wrfl();
2208                 msleep(100);
2209                 dev_info(&pdev->dev, "IOV Disabled\n");
2210         }
2211 #endif
2212
2213         iounmap(hw->hw_addr);
2214         if (hw->flash_address)
2215                 iounmap(hw->flash_address);
2216         pci_release_selected_regions(pdev,
2217                                      pci_select_bars(pdev, IORESOURCE_MEM));
2218
2219         free_netdev(netdev);
2220
2221         pci_disable_pcie_error_reporting(pdev);
2222
2223         pci_disable_device(pdev);
2224 }
2225
2226 /**
2227  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2228  * @adapter: board private structure to initialize
2229  *
2230  * This function initializes the vf specific data storage and then attempts to
2231  * allocate the VFs.  The reason for ordering it this way is because it is much
2232  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2233  * the memory for the VFs.
2234  **/
2235 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2236 {
2237 #ifdef CONFIG_PCI_IOV
2238         struct pci_dev *pdev = adapter->pdev;
2239
2240         if (adapter->vfs_allocated_count) {
2241                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2242                                            sizeof(struct vf_data_storage),
2243                                            GFP_KERNEL);
2244                 /* if allocation failed then we do not support SR-IOV */
2245                 if (!adapter->vf_data) {
2246                         adapter->vfs_allocated_count = 0;
2247                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2248                                 "Data Storage\n");
2249                 }
2250         }
2251
2252         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2253                 kfree(adapter->vf_data);
2254                 adapter->vf_data = NULL;
2255 #endif /* CONFIG_PCI_IOV */
2256                 adapter->vfs_allocated_count = 0;
2257 #ifdef CONFIG_PCI_IOV
2258         } else {
2259                 unsigned char mac_addr[ETH_ALEN];
2260                 int i;
2261                 dev_info(&pdev->dev, "%d vfs allocated\n",
2262                          adapter->vfs_allocated_count);
2263                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2264                         random_ether_addr(mac_addr);
2265                         igb_set_vf_mac(adapter, i, mac_addr);
2266                 }
2267                 /* DMA Coalescing is not supported in IOV mode. */
2268                 if (adapter->flags & IGB_FLAG_DMAC)
2269                         adapter->flags &= ~IGB_FLAG_DMAC;
2270         }
2271 #endif /* CONFIG_PCI_IOV */
2272 }
2273
2274
2275 /**
2276  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2277  * @adapter: board private structure to initialize
2278  *
2279  * igb_init_hw_timer initializes the function pointer and values for the hw
2280  * timer found in hardware.
2281  **/
2282 static void igb_init_hw_timer(struct igb_adapter *adapter)
2283 {
2284         struct e1000_hw *hw = &adapter->hw;
2285
2286         switch (hw->mac.type) {
2287         case e1000_i350:
2288         case e1000_82580:
2289                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290                 adapter->cycles.read = igb_read_clock;
2291                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292                 adapter->cycles.mult = 1;
2293                 /*
2294                  * The 82580 timesync updates the system timer every 8ns by 8ns
2295                  * and the value cannot be shifted.  Instead we need to shift
2296                  * the registers to generate a 64bit timer value.  As a result
2297                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2298                  * 24 in order to generate a larger value for synchronization.
2299                  */
2300                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2301                 /* disable system timer temporarily by setting bit 31 */
2302                 wr32(E1000_TSAUXC, 0x80000000);
2303                 wrfl();
2304
2305                 /* Set registers so that rollover occurs soon to test this. */
2306                 wr32(E1000_SYSTIMR, 0x00000000);
2307                 wr32(E1000_SYSTIML, 0x80000000);
2308                 wr32(E1000_SYSTIMH, 0x000000FF);
2309                 wrfl();
2310
2311                 /* enable system timer by clearing bit 31 */
2312                 wr32(E1000_TSAUXC, 0x0);
2313                 wrfl();
2314
2315                 timecounter_init(&adapter->clock,
2316                                  &adapter->cycles,
2317                                  ktime_to_ns(ktime_get_real()));
2318                 /*
2319                  * Synchronize our NIC clock against system wall clock. NIC
2320                  * time stamp reading requires ~3us per sample, each sample
2321                  * was pretty stable even under load => only require 10
2322                  * samples for each offset comparison.
2323                  */
2324                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2325                 adapter->compare.source = &adapter->clock;
2326                 adapter->compare.target = ktime_get_real;
2327                 adapter->compare.num_samples = 10;
2328                 timecompare_update(&adapter->compare, 0);
2329                 break;
2330         case e1000_82576:
2331                 /*
2332                  * Initialize hardware timer: we keep it running just in case
2333                  * that some program needs it later on.
2334                  */
2335                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2336                 adapter->cycles.read = igb_read_clock;
2337                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2338                 adapter->cycles.mult = 1;
2339                 /**
2340                  * Scale the NIC clock cycle by a large factor so that
2341                  * relatively small clock corrections can be added or
2342                  * subtracted at each clock tick. The drawbacks of a large
2343                  * factor are a) that the clock register overflows more quickly
2344                  * (not such a big deal) and b) that the increment per tick has
2345                  * to fit into 24 bits.  As a result we need to use a shift of
2346                  * 19 so we can fit a value of 16 into the TIMINCA register.
2347                  */
2348                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2349                 wr32(E1000_TIMINCA,
2350                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2351                                 (16 << IGB_82576_TSYNC_SHIFT));
2352
2353                 /* Set registers so that rollover occurs soon to test this. */
2354                 wr32(E1000_SYSTIML, 0x00000000);
2355                 wr32(E1000_SYSTIMH, 0xFF800000);
2356                 wrfl();
2357
2358                 timecounter_init(&adapter->clock,
2359                                  &adapter->cycles,
2360                                  ktime_to_ns(ktime_get_real()));
2361                 /*
2362                  * Synchronize our NIC clock against system wall clock. NIC
2363                  * time stamp reading requires ~3us per sample, each sample
2364                  * was pretty stable even under load => only require 10
2365                  * samples for each offset comparison.
2366                  */
2367                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2368                 adapter->compare.source = &adapter->clock;
2369                 adapter->compare.target = ktime_get_real;
2370                 adapter->compare.num_samples = 10;
2371                 timecompare_update(&adapter->compare, 0);
2372                 break;
2373         case e1000_82575:
2374                 /* 82575 does not support timesync */
2375         default:
2376                 break;
2377         }
2378
2379 }
2380
2381 /**
2382  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2383  * @adapter: board private structure to initialize
2384  *
2385  * igb_sw_init initializes the Adapter private data structure.
2386  * Fields are initialized based on PCI device information and
2387  * OS network device settings (MTU size).
2388  **/
2389 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2390 {
2391         struct e1000_hw *hw = &adapter->hw;
2392         struct net_device *netdev = adapter->netdev;
2393         struct pci_dev *pdev = adapter->pdev;
2394
2395         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2396
2397         /* set default ring sizes */
2398         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2399         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2400
2401         /* set default ITR values */
2402         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2403         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2404
2405         /* set default work limits */
2406         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2407
2408         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2409                                   VLAN_HLEN;
2410         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2411
2412         spin_lock_init(&adapter->stats64_lock);
2413 #ifdef CONFIG_PCI_IOV
2414         switch (hw->mac.type) {
2415         case e1000_82576:
2416         case e1000_i350:
2417                 if (max_vfs > 7) {
2418                         dev_warn(&pdev->dev,
2419                                  "Maximum of 7 VFs per PF, using max\n");
2420                         adapter->vfs_allocated_count = 7;
2421                 } else
2422                         adapter->vfs_allocated_count = max_vfs;
2423                 break;
2424         default:
2425                 break;
2426         }
2427 #endif /* CONFIG_PCI_IOV */
2428         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429         /* i350 cannot do RSS and SR-IOV at the same time */
2430         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431                 adapter->rss_queues = 1;
2432
2433         /*
2434          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435          * then we should combine the queues into a queue pair in order to
2436          * conserve interrupts due to limited supply
2437          */
2438         if ((adapter->rss_queues > 4) ||
2439             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2441
2442         /* This call may decrease the number of queues */
2443         if (igb_init_interrupt_scheme(adapter)) {
2444                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2445                 return -ENOMEM;
2446         }
2447
2448         igb_probe_vfs(adapter);
2449
2450         /* Explicitly disable IRQ since the NIC can be in any state. */
2451         igb_irq_disable(adapter);
2452
2453         if (hw->mac.type == e1000_i350)
2454                 adapter->flags &= ~IGB_FLAG_DMAC;
2455
2456         set_bit(__IGB_DOWN, &adapter->state);
2457         return 0;
2458 }
2459
2460 /**
2461  * igb_open - Called when a network interface is made active
2462  * @netdev: network interface device structure
2463  *
2464  * Returns 0 on success, negative value on failure
2465  *
2466  * The open entry point is called when a network interface is made
2467  * active by the system (IFF_UP).  At this point all resources needed
2468  * for transmit and receive operations are allocated, the interrupt
2469  * handler is registered with the OS, the watchdog timer is started,
2470  * and the stack is notified that the interface is ready.
2471  **/
2472 static int igb_open(struct net_device *netdev)
2473 {
2474         struct igb_adapter *adapter = netdev_priv(netdev);
2475         struct e1000_hw *hw = &adapter->hw;
2476         int err;
2477         int i;
2478
2479         /* disallow open during test */
2480         if (test_bit(__IGB_TESTING, &adapter->state))
2481                 return -EBUSY;
2482
2483         netif_carrier_off(netdev);
2484
2485         /* allocate transmit descriptors */
2486         err = igb_setup_all_tx_resources(adapter);
2487         if (err)
2488                 goto err_setup_tx;
2489
2490         /* allocate receive descriptors */
2491         err = igb_setup_all_rx_resources(adapter);
2492         if (err)
2493                 goto err_setup_rx;
2494
2495         igb_power_up_link(adapter);
2496
2497         /* before we allocate an interrupt, we must be ready to handle it.
2498          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2499          * as soon as we call pci_request_irq, so we have to setup our
2500          * clean_rx handler before we do so.  */
2501         igb_configure(adapter);
2502
2503         err = igb_request_irq(adapter);
2504         if (err)
2505                 goto err_req_irq;
2506
2507         /* From here on the code is the same as igb_up() */
2508         clear_bit(__IGB_DOWN, &adapter->state);
2509
2510         for (i = 0; i < adapter->num_q_vectors; i++) {
2511                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2512                 napi_enable(&q_vector->napi);
2513         }
2514
2515         /* Clear any pending interrupts. */
2516         rd32(E1000_ICR);
2517
2518         igb_irq_enable(adapter);
2519
2520         /* notify VFs that reset has been completed */
2521         if (adapter->vfs_allocated_count) {
2522                 u32 reg_data = rd32(E1000_CTRL_EXT);
2523                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2524                 wr32(E1000_CTRL_EXT, reg_data);
2525         }
2526
2527         netif_tx_start_all_queues(netdev);
2528
2529         /* start the watchdog. */
2530         hw->mac.get_link_status = 1;
2531         schedule_work(&adapter->watchdog_task);
2532
2533         return 0;
2534
2535 err_req_irq:
2536         igb_release_hw_control(adapter);
2537         igb_power_down_link(adapter);
2538         igb_free_all_rx_resources(adapter);
2539 err_setup_rx:
2540         igb_free_all_tx_resources(adapter);
2541 err_setup_tx:
2542         igb_reset(adapter);
2543
2544         return err;
2545 }
2546
2547 /**
2548  * igb_close - Disables a network interface
2549  * @netdev: network interface device structure
2550  *
2551  * Returns 0, this is not allowed to fail
2552  *
2553  * The close entry point is called when an interface is de-activated
2554  * by the OS.  The hardware is still under the driver's control, but
2555  * needs to be disabled.  A global MAC reset is issued to stop the
2556  * hardware, and all transmit and receive resources are freed.
2557  **/
2558 static int igb_close(struct net_device *netdev)
2559 {
2560         struct igb_adapter *adapter = netdev_priv(netdev);
2561
2562         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2563         igb_down(adapter);
2564
2565         igb_free_irq(adapter);
2566
2567         igb_free_all_tx_resources(adapter);
2568         igb_free_all_rx_resources(adapter);
2569
2570         return 0;
2571 }
2572
2573 /**
2574  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2575  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2576  *
2577  * Return 0 on success, negative on failure
2578  **/
2579 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2580 {
2581         struct device *dev = tx_ring->dev;
2582         int size;
2583
2584         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2585         tx_ring->tx_buffer_info = vzalloc(size);
2586         if (!tx_ring->tx_buffer_info)
2587                 goto err;
2588
2589         /* round up to nearest 4K */
2590         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2591         tx_ring->size = ALIGN(tx_ring->size, 4096);
2592
2593         tx_ring->desc = dma_alloc_coherent(dev,
2594                                            tx_ring->size,
2595                                            &tx_ring->dma,
2596                                            GFP_KERNEL);
2597
2598         if (!tx_ring->desc)
2599                 goto err;
2600
2601         tx_ring->next_to_use = 0;
2602         tx_ring->next_to_clean = 0;
2603         return 0;
2604
2605 err:
2606         vfree(tx_ring->tx_buffer_info);
2607         dev_err(dev,
2608                 "Unable to allocate memory for the transmit descriptor ring\n");
2609         return -ENOMEM;
2610 }
2611
2612 /**
2613  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2614  *                                (Descriptors) for all queues
2615  * @adapter: board private structure
2616  *
2617  * Return 0 on success, negative on failure
2618  **/
2619 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2620 {
2621         struct pci_dev *pdev = adapter->pdev;
2622         int i, err = 0;
2623
2624         for (i = 0; i < adapter->num_tx_queues; i++) {
2625                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2626                 if (err) {
2627                         dev_err(&pdev->dev,
2628                                 "Allocation for Tx Queue %u failed\n", i);
2629                         for (i--; i >= 0; i--)
2630                                 igb_free_tx_resources(adapter->tx_ring[i]);
2631                         break;
2632                 }
2633         }
2634
2635         return err;
2636 }
2637
2638 /**
2639  * igb_setup_tctl - configure the transmit control registers
2640  * @adapter: Board private structure
2641  **/
2642 void igb_setup_tctl(struct igb_adapter *adapter)
2643 {
2644         struct e1000_hw *hw = &adapter->hw;
2645         u32 tctl;
2646
2647         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2648         wr32(E1000_TXDCTL(0), 0);
2649
2650         /* Program the Transmit Control Register */
2651         tctl = rd32(E1000_TCTL);
2652         tctl &= ~E1000_TCTL_CT;
2653         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2654                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2655
2656         igb_config_collision_dist(hw);
2657
2658         /* Enable transmits */
2659         tctl |= E1000_TCTL_EN;
2660
2661         wr32(E1000_TCTL, tctl);
2662 }
2663
2664 /**
2665  * igb_configure_tx_ring - Configure transmit ring after Reset
2666  * @adapter: board private structure
2667  * @ring: tx ring to configure
2668  *
2669  * Configure a transmit ring after a reset.
2670  **/
2671 void igb_configure_tx_ring(struct igb_adapter *adapter,
2672                            struct igb_ring *ring)
2673 {
2674         struct e1000_hw *hw = &adapter->hw;
2675         u32 txdctl = 0;
2676         u64 tdba = ring->dma;
2677         int reg_idx = ring->reg_idx;
2678
2679         /* disable the queue */
2680         wr32(E1000_TXDCTL(reg_idx), 0);
2681         wrfl();
2682         mdelay(10);
2683
2684         wr32(E1000_TDLEN(reg_idx),
2685                         ring->count * sizeof(union e1000_adv_tx_desc));
2686         wr32(E1000_TDBAL(reg_idx),
2687                         tdba & 0x00000000ffffffffULL);
2688         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2689
2690         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2691         wr32(E1000_TDH(reg_idx), 0);
2692         writel(0, ring->tail);
2693
2694         txdctl |= IGB_TX_PTHRESH;
2695         txdctl |= IGB_TX_HTHRESH << 8;
2696         txdctl |= IGB_TX_WTHRESH << 16;
2697
2698         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2699         wr32(E1000_TXDCTL(reg_idx), txdctl);
2700 }
2701
2702 /**
2703  * igb_configure_tx - Configure transmit Unit after Reset
2704  * @adapter: board private structure
2705  *
2706  * Configure the Tx unit of the MAC after a reset.
2707  **/
2708 static void igb_configure_tx(struct igb_adapter *adapter)
2709 {
2710         int i;
2711
2712         for (i = 0; i < adapter->num_tx_queues; i++)
2713                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2714 }
2715
2716 /**
2717  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2718  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2719  *
2720  * Returns 0 on success, negative on failure
2721  **/
2722 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2723 {
2724         struct device *dev = rx_ring->dev;
2725         int size, desc_len;
2726
2727         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2728         rx_ring->rx_buffer_info = vzalloc(size);
2729         if (!rx_ring->rx_buffer_info)
2730                 goto err;
2731
2732         desc_len = sizeof(union e1000_adv_rx_desc);
2733
2734         /* Round up to nearest 4K */
2735         rx_ring->size = rx_ring->count * desc_len;
2736         rx_ring->size = ALIGN(rx_ring->size, 4096);
2737
2738         rx_ring->desc = dma_alloc_coherent(dev,
2739                                            rx_ring->size,
2740                                            &rx_ring->dma,
2741                                            GFP_KERNEL);
2742
2743         if (!rx_ring->desc)
2744                 goto err;
2745
2746         rx_ring->next_to_clean = 0;
2747         rx_ring->next_to_use = 0;
2748
2749         return 0;
2750
2751 err:
2752         vfree(rx_ring->rx_buffer_info);
2753         rx_ring->rx_buffer_info = NULL;
2754         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2755                 " ring\n");
2756         return -ENOMEM;
2757 }
2758
2759 /**
2760  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2761  *                                (Descriptors) for all queues
2762  * @adapter: board private structure
2763  *
2764  * Return 0 on success, negative on failure
2765  **/
2766 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2767 {
2768         struct pci_dev *pdev = adapter->pdev;
2769         int i, err = 0;
2770
2771         for (i = 0; i < adapter->num_rx_queues; i++) {
2772                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2773                 if (err) {
2774                         dev_err(&pdev->dev,
2775                                 "Allocation for Rx Queue %u failed\n", i);
2776                         for (i--; i >= 0; i--)
2777                                 igb_free_rx_resources(adapter->rx_ring[i]);
2778                         break;
2779                 }
2780         }
2781
2782         return err;
2783 }
2784
2785 /**
2786  * igb_setup_mrqc - configure the multiple receive queue control registers
2787  * @adapter: Board private structure
2788  **/
2789 static void igb_setup_mrqc(struct igb_adapter *adapter)
2790 {
2791         struct e1000_hw *hw = &adapter->hw;
2792         u32 mrqc, rxcsum;
2793         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2794         union e1000_reta {
2795                 u32 dword;
2796                 u8  bytes[4];
2797         } reta;
2798         static const u8 rsshash[40] = {
2799                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2800                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2801                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2802                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2803
2804         /* Fill out hash function seeds */
2805         for (j = 0; j < 10; j++) {
2806                 u32 rsskey = rsshash[(j * 4)];
2807                 rsskey |= rsshash[(j * 4) + 1] << 8;
2808                 rsskey |= rsshash[(j * 4) + 2] << 16;
2809                 rsskey |= rsshash[(j * 4) + 3] << 24;
2810                 array_wr32(E1000_RSSRK(0), j, rsskey);
2811         }
2812
2813         num_rx_queues = adapter->rss_queues;
2814
2815         if (adapter->vfs_allocated_count) {
2816                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2817                 switch (hw->mac.type) {
2818                 case e1000_i350:
2819                 case e1000_82580:
2820                         num_rx_queues = 1;
2821                         shift = 0;
2822                         break;
2823                 case e1000_82576:
2824                         shift = 3;
2825                         num_rx_queues = 2;
2826                         break;
2827                 case e1000_82575:
2828                         shift = 2;
2829                         shift2 = 6;
2830                 default:
2831                         break;
2832                 }
2833         } else {
2834                 if (hw->mac.type == e1000_82575)
2835                         shift = 6;
2836         }
2837
2838         for (j = 0; j < (32 * 4); j++) {
2839                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2840                 if (shift2)
2841                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2842                 if ((j & 3) == 3)
2843                         wr32(E1000_RETA(j >> 2), reta.dword);
2844         }
2845
2846         /*
2847          * Disable raw packet checksumming so that RSS hash is placed in
2848          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2849          * offloads as they are enabled by default
2850          */
2851         rxcsum = rd32(E1000_RXCSUM);
2852         rxcsum |= E1000_RXCSUM_PCSD;
2853
2854         if (adapter->hw.mac.type >= e1000_82576)
2855                 /* Enable Receive Checksum Offload for SCTP */
2856                 rxcsum |= E1000_RXCSUM_CRCOFL;
2857
2858         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2859         wr32(E1000_RXCSUM, rxcsum);
2860
2861         /* If VMDq is enabled then we set the appropriate mode for that, else
2862          * we default to RSS so that an RSS hash is calculated per packet even
2863          * if we are only using one queue */
2864         if (adapter->vfs_allocated_count) {
2865                 if (hw->mac.type > e1000_82575) {
2866                         /* Set the default pool for the PF's first queue */
2867                         u32 vtctl = rd32(E1000_VT_CTL);
2868                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2869                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2870                         vtctl |= adapter->vfs_allocated_count <<
2871                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2872                         wr32(E1000_VT_CTL, vtctl);
2873                 }
2874                 if (adapter->rss_queues > 1)
2875                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2876                 else
2877                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2878         } else {
2879                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2880         }
2881         igb_vmm_control(adapter);
2882
2883         /*
2884          * Generate RSS hash based on TCP port numbers and/or
2885          * IPv4/v6 src and dst addresses since UDP cannot be
2886          * hashed reliably due to IP fragmentation
2887          */
2888         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2889                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2890                 E1000_MRQC_RSS_FIELD_IPV6 |
2891                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2892                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2893
2894         wr32(E1000_MRQC, mrqc);
2895 }
2896
2897 /**
2898  * igb_setup_rctl - configure the receive control registers
2899  * @adapter: Board private structure
2900  **/
2901 void igb_setup_rctl(struct igb_adapter *adapter)
2902 {
2903         struct e1000_hw *hw = &adapter->hw;
2904         u32 rctl;
2905
2906         rctl = rd32(E1000_RCTL);
2907
2908         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2909         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2910
2911         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2912                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2913
2914         /*
2915          * enable stripping of CRC. It's unlikely this will break BMC
2916          * redirection as it did with e1000. Newer features require
2917          * that the HW strips the CRC.
2918          */
2919         rctl |= E1000_RCTL_SECRC;
2920
2921         /* disable store bad packets and clear size bits. */
2922         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2923
2924         /* enable LPE to prevent packets larger than max_frame_size */
2925         rctl |= E1000_RCTL_LPE;
2926
2927         /* disable queue 0 to prevent tail write w/o re-config */
2928         wr32(E1000_RXDCTL(0), 0);
2929
2930         /* Attention!!!  For SR-IOV PF driver operations you must enable
2931          * queue drop for all VF and PF queues to prevent head of line blocking
2932          * if an un-trusted VF does not provide descriptors to hardware.
2933          */
2934         if (adapter->vfs_allocated_count) {
2935                 /* set all queue drop enable bits */
2936                 wr32(E1000_QDE, ALL_QUEUES);
2937         }
2938
2939         wr32(E1000_RCTL, rctl);
2940 }
2941
2942 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2943                                    int vfn)
2944 {
2945         struct e1000_hw *hw = &adapter->hw;
2946         u32 vmolr;
2947
2948         /* if it isn't the PF check to see if VFs are enabled and
2949          * increase the size to support vlan tags */
2950         if (vfn < adapter->vfs_allocated_count &&
2951             adapter->vf_data[vfn].vlans_enabled)
2952                 size += VLAN_TAG_SIZE;
2953
2954         vmolr = rd32(E1000_VMOLR(vfn));
2955         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2956         vmolr |= size | E1000_VMOLR_LPE;
2957         wr32(E1000_VMOLR(vfn), vmolr);
2958
2959         return 0;
2960 }
2961
2962 /**
2963  * igb_rlpml_set - set maximum receive packet size
2964  * @adapter: board private structure
2965  *
2966  * Configure maximum receivable packet size.
2967  **/
2968 static void igb_rlpml_set(struct igb_adapter *adapter)
2969 {
2970         u32 max_frame_size = adapter->max_frame_size;
2971         struct e1000_hw *hw = &adapter->hw;
2972         u16 pf_id = adapter->vfs_allocated_count;
2973
2974         if (pf_id) {
2975                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2976                 /*
2977                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
2978                  * to our max jumbo frame size, in case we need to enable
2979                  * jumbo frames on one of the rings later.
2980                  * This will not pass over-length frames into the default
2981                  * queue because it's gated by the VMOLR.RLPML.
2982                  */
2983                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2984         }
2985
2986         wr32(E1000_RLPML, max_frame_size);
2987 }
2988
2989 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2990                                  int vfn, bool aupe)
2991 {
2992         struct e1000_hw *hw = &adapter->hw;
2993         u32 vmolr;
2994
2995         /*
2996          * This register exists only on 82576 and newer so if we are older then
2997          * we should exit and do nothing
2998          */
2999         if (hw->mac.type < e1000_82576)
3000                 return;
3001
3002         vmolr = rd32(E1000_VMOLR(vfn));
3003         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3004         if (aupe)
3005                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3006         else
3007                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3008
3009         /* clear all bits that might not be set */
3010         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3011
3012         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3013                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3014         /*
3015          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3016          * multicast packets
3017          */
3018         if (vfn <= adapter->vfs_allocated_count)
3019                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3020
3021         wr32(E1000_VMOLR(vfn), vmolr);
3022 }
3023
3024 /**
3025  * igb_configure_rx_ring - Configure a receive ring after Reset
3026  * @adapter: board private structure
3027  * @ring: receive ring to be configured
3028  *
3029  * Configure the Rx unit of the MAC after a reset.
3030  **/
3031 void igb_configure_rx_ring(struct igb_adapter *adapter,
3032                            struct igb_ring *ring)
3033 {
3034         struct e1000_hw *hw = &adapter->hw;
3035         u64 rdba = ring->dma;
3036         int reg_idx = ring->reg_idx;
3037         u32 srrctl = 0, rxdctl = 0;
3038
3039         /* disable the queue */
3040         wr32(E1000_RXDCTL(reg_idx), 0);
3041
3042         /* Set DMA base address registers */
3043         wr32(E1000_RDBAL(reg_idx),
3044              rdba & 0x00000000ffffffffULL);
3045         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3046         wr32(E1000_RDLEN(reg_idx),
3047                        ring->count * sizeof(union e1000_adv_rx_desc));
3048
3049         /* initialize head and tail */
3050         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3051         wr32(E1000_RDH(reg_idx), 0);
3052         writel(0, ring->tail);
3053
3054         /* set descriptor configuration */
3055         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3056 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3057         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3058 #else
3059         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3060 #endif
3061         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3062         if (hw->mac.type == e1000_82580)
3063                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3064         /* Only set Drop Enable if we are supporting multiple queues */
3065         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3066                 srrctl |= E1000_SRRCTL_DROP_EN;
3067
3068         wr32(E1000_SRRCTL(reg_idx), srrctl);
3069
3070         /* set filtering for VMDQ pools */
3071         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3072
3073         rxdctl |= IGB_RX_PTHRESH;
3074         rxdctl |= IGB_RX_HTHRESH << 8;
3075         rxdctl |= IGB_RX_WTHRESH << 16;
3076
3077         /* enable receive descriptor fetching */
3078         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3079         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3080 }
3081
3082 /**
3083  * igb_configure_rx - Configure receive Unit after Reset
3084  * @adapter: board private structure
3085  *
3086  * Configure the Rx unit of the MAC after a reset.
3087  **/
3088 static void igb_configure_rx(struct igb_adapter *adapter)
3089 {
3090         int i;
3091
3092         /* set UTA to appropriate mode */
3093         igb_set_uta(adapter);
3094
3095         /* set the correct pool for the PF default MAC address in entry 0 */
3096         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3097                          adapter->vfs_allocated_count);
3098
3099         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3100          * the Base and Length of the Rx Descriptor Ring */
3101         for (i = 0; i < adapter->num_rx_queues; i++)
3102                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3103 }
3104
3105 /**
3106  * igb_free_tx_resources - Free Tx Resources per Queue
3107  * @tx_ring: Tx descriptor ring for a specific queue
3108  *
3109  * Free all transmit software resources
3110  **/
3111 void igb_free_tx_resources(struct igb_ring *tx_ring)
3112 {
3113         igb_clean_tx_ring(tx_ring);
3114
3115         vfree(tx_ring->tx_buffer_info);
3116         tx_ring->tx_buffer_info = NULL;
3117
3118         /* if not set, then don't free */
3119         if (!tx_ring->desc)
3120                 return;
3121
3122         dma_free_coherent(tx_ring->dev, tx_ring->size,
3123                           tx_ring->desc, tx_ring->dma);
3124
3125         tx_ring->desc = NULL;
3126 }
3127
3128 /**
3129  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3130  * @adapter: board private structure
3131  *
3132  * Free all transmit software resources
3133  **/
3134 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3135 {
3136         int i;
3137
3138         for (i = 0; i < adapter->num_tx_queues; i++)
3139                 igb_free_tx_resources(adapter->tx_ring[i]);
3140 }
3141
3142 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3143                                     struct igb_tx_buffer *tx_buffer)
3144 {
3145         if (tx_buffer->skb) {
3146                 dev_kfree_skb_any(tx_buffer->skb);
3147                 if (tx_buffer->dma)
3148                         dma_unmap_single(ring->dev,
3149                                          tx_buffer->dma,
3150                                          tx_buffer->length,
3151                                          DMA_TO_DEVICE);
3152         } else if (tx_buffer->dma) {
3153                 dma_unmap_page(ring->dev,
3154                                tx_buffer->dma,
3155                                tx_buffer->length,
3156                                DMA_TO_DEVICE);
3157         }
3158         tx_buffer->next_to_watch = NULL;
3159         tx_buffer->skb = NULL;
3160         tx_buffer->dma = 0;
3161         /* buffer_info must be completely set up in the transmit path */
3162 }
3163
3164 /**
3165  * igb_clean_tx_ring - Free Tx Buffers
3166  * @tx_ring: ring to be cleaned
3167  **/
3168 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3169 {
3170         struct igb_tx_buffer *buffer_info;
3171         unsigned long size;
3172         unsigned int i;
3173
3174         if (!tx_ring->tx_buffer_info)
3175                 return;
3176         /* Free all the Tx ring sk_buffs */
3177
3178         for (i = 0; i < tx_ring->count; i++) {
3179                 buffer_info = &tx_ring->tx_buffer_info[i];
3180                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3181         }
3182
3183         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3184         memset(tx_ring->tx_buffer_info, 0, size);
3185
3186         /* Zero out the descriptor ring */
3187         memset(tx_ring->desc, 0, tx_ring->size);
3188
3189         tx_ring->next_to_use = 0;
3190         tx_ring->next_to_clean = 0;
3191 }
3192
3193 /**
3194  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3195  * @adapter: board private structure
3196  **/
3197 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3198 {
3199         int i;
3200
3201         for (i = 0; i < adapter->num_tx_queues; i++)
3202                 igb_clean_tx_ring(adapter->tx_ring[i]);
3203 }
3204
3205 /**
3206  * igb_free_rx_resources - Free Rx Resources
3207  * @rx_ring: ring to clean the resources from
3208  *
3209  * Free all receive software resources
3210  **/
3211 void igb_free_rx_resources(struct igb_ring *rx_ring)
3212 {
3213         igb_clean_rx_ring(rx_ring);
3214
3215         vfree(rx_ring->rx_buffer_info);
3216         rx_ring->rx_buffer_info = NULL;
3217
3218         /* if not set, then don't free */
3219         if (!rx_ring->desc)
3220                 return;
3221
3222         dma_free_coherent(rx_ring->dev, rx_ring->size,
3223                           rx_ring->desc, rx_ring->dma);
3224
3225         rx_ring->desc = NULL;
3226 }
3227
3228 /**
3229  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3230  * @adapter: board private structure
3231  *
3232  * Free all receive software resources
3233  **/
3234 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3235 {
3236         int i;
3237
3238         for (i = 0; i < adapter->num_rx_queues; i++)
3239                 igb_free_rx_resources(adapter->rx_ring[i]);
3240 }
3241
3242 /**
3243  * igb_clean_rx_ring - Free Rx Buffers per Queue
3244  * @rx_ring: ring to free buffers from
3245  **/
3246 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3247 {
3248         unsigned long size;
3249         u16 i;
3250
3251         if (!rx_ring->rx_buffer_info)
3252                 return;
3253
3254         /* Free all the Rx ring sk_buffs */
3255         for (i = 0; i < rx_ring->count; i++) {
3256                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3257                 if (buffer_info->dma) {
3258                         dma_unmap_single(rx_ring->dev,
3259                                          buffer_info->dma,
3260                                          IGB_RX_HDR_LEN,
3261                                          DMA_FROM_DEVICE);
3262                         buffer_info->dma = 0;
3263                 }
3264
3265                 if (buffer_info->skb) {
3266                         dev_kfree_skb(buffer_info->skb);
3267                         buffer_info->skb = NULL;
3268                 }
3269                 if (buffer_info->page_dma) {
3270                         dma_unmap_page(rx_ring->dev,
3271                                        buffer_info->page_dma,
3272                                        PAGE_SIZE / 2,
3273                                        DMA_FROM_DEVICE);
3274                         buffer_info->page_dma = 0;
3275                 }
3276                 if (buffer_info->page) {
3277                         put_page(buffer_info->page);
3278                         buffer_info->page = NULL;
3279                         buffer_info->page_offset = 0;
3280                 }
3281         }
3282
3283         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3284         memset(rx_ring->rx_buffer_info, 0, size);
3285
3286         /* Zero out the descriptor ring */
3287         memset(rx_ring->desc, 0, rx_ring->size);
3288
3289         rx_ring->next_to_clean = 0;
3290         rx_ring->next_to_use = 0;
3291 }
3292
3293 /**
3294  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3295  * @adapter: board private structure
3296  **/
3297 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3298 {
3299         int i;
3300
3301         for (i = 0; i < adapter->num_rx_queues; i++)
3302                 igb_clean_rx_ring(adapter->rx_ring[i]);
3303 }
3304
3305 /**
3306  * igb_set_mac - Change the Ethernet Address of the NIC
3307  * @netdev: network interface device structure
3308  * @p: pointer to an address structure
3309  *
3310  * Returns 0 on success, negative on failure
3311  **/
3312 static int igb_set_mac(struct net_device *netdev, void *p)
3313 {
3314         struct igb_adapter *adapter = netdev_priv(netdev);
3315         struct e1000_hw *hw = &adapter->hw;
3316         struct sockaddr *addr = p;
3317
3318         if (!is_valid_ether_addr(addr->sa_data))
3319                 return -EADDRNOTAVAIL;
3320
3321         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3322         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3323
3324         /* set the correct pool for the new PF MAC address in entry 0 */
3325         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3326                          adapter->vfs_allocated_count);
3327
3328         return 0;
3329 }
3330
3331 /**
3332  * igb_write_mc_addr_list - write multicast addresses to MTA
3333  * @netdev: network interface device structure
3334  *
3335  * Writes multicast address list to the MTA hash table.
3336  * Returns: -ENOMEM on failure
3337  *                0 on no addresses written
3338  *                X on writing X addresses to MTA
3339  **/
3340 static int igb_write_mc_addr_list(struct net_device *netdev)
3341 {
3342         struct igb_adapter *adapter = netdev_priv(netdev);
3343         struct e1000_hw *hw = &adapter->hw;
3344         struct netdev_hw_addr *ha;
3345         u8  *mta_list;
3346         int i;
3347
3348         if (netdev_mc_empty(netdev)) {
3349                 /* nothing to program, so clear mc list */
3350                 igb_update_mc_addr_list(hw, NULL, 0);
3351                 igb_restore_vf_multicasts(adapter);
3352                 return 0;
3353         }
3354
3355         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3356         if (!mta_list)
3357                 return -ENOMEM;
3358
3359         /* The shared function expects a packed array of only addresses. */
3360         i = 0;
3361         netdev_for_each_mc_addr(ha, netdev)
3362                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3363
3364         igb_update_mc_addr_list(hw, mta_list, i);
3365         kfree(mta_list);
3366
3367         return netdev_mc_count(netdev);
3368 }
3369
3370 /**
3371  * igb_write_uc_addr_list - write unicast addresses to RAR table
3372  * @netdev: network interface device structure
3373  *
3374  * Writes unicast address list to the RAR table.
3375  * Returns: -ENOMEM on failure/insufficient address space
3376  *                0 on no addresses written
3377  *                X on writing X addresses to the RAR table
3378  **/
3379 static int igb_write_uc_addr_list(struct net_device *netdev)
3380 {
3381         struct igb_adapter *adapter = netdev_priv(netdev);
3382         struct e1000_hw *hw = &adapter->hw;
3383         unsigned int vfn = adapter->vfs_allocated_count;
3384         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3385         int count = 0;
3386
3387         /* return ENOMEM indicating insufficient memory for addresses */
3388         if (netdev_uc_count(netdev) > rar_entries)
3389                 return -ENOMEM;
3390
3391         if (!netdev_uc_empty(netdev) && rar_entries) {
3392                 struct netdev_hw_addr *ha;
3393
3394                 netdev_for_each_uc_addr(ha, netdev) {
3395                         if (!rar_entries)
3396                                 break;
3397                         igb_rar_set_qsel(adapter, ha->addr,
3398                                          rar_entries--,
3399                                          vfn);
3400                         count++;
3401                 }
3402         }
3403         /* write the addresses in reverse order to avoid write combining */
3404         for (; rar_entries > 0 ; rar_entries--) {
3405                 wr32(E1000_RAH(rar_entries), 0);
3406                 wr32(E1000_RAL(rar_entries), 0);
3407         }
3408         wrfl();
3409
3410         return count;
3411 }
3412
3413 /**
3414  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3415  * @netdev: network interface device structure
3416  *
3417  * The set_rx_mode entry point is called whenever the unicast or multicast
3418  * address lists or the network interface flags are updated.  This routine is
3419  * responsible for configuring the hardware for proper unicast, multicast,
3420  * promiscuous mode, and all-multi behavior.
3421  **/
3422 static void igb_set_rx_mode(struct net_device *netdev)
3423 {
3424         struct igb_adapter *adapter = netdev_priv(netdev);
3425         struct e1000_hw *hw = &adapter->hw;
3426         unsigned int vfn = adapter->vfs_allocated_count;
3427         u32 rctl, vmolr = 0;
3428         int count;
3429
3430         /* Check for Promiscuous and All Multicast modes */
3431         rctl = rd32(E1000_RCTL);
3432
3433         /* clear the effected bits */
3434         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3435
3436         if (netdev->flags & IFF_PROMISC) {
3437                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3438                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3439         } else {
3440                 if (netdev->flags & IFF_ALLMULTI) {
3441                         rctl |= E1000_RCTL_MPE;
3442                         vmolr |= E1000_VMOLR_MPME;
3443                 } else {
3444                         /*
3445                          * Write addresses to the MTA, if the attempt fails
3446                          * then we should just turn on promiscuous mode so
3447                          * that we can at least receive multicast traffic
3448                          */
3449                         count = igb_write_mc_addr_list(netdev);
3450                         if (count < 0) {
3451                                 rctl |= E1000_RCTL_MPE;
3452                                 vmolr |= E1000_VMOLR_MPME;
3453                         } else if (count) {
3454                                 vmolr |= E1000_VMOLR_ROMPE;
3455                         }
3456                 }
3457                 /*
3458                  * Write addresses to available RAR registers, if there is not
3459                  * sufficient space to store all the addresses then enable
3460                  * unicast promiscuous mode
3461                  */
3462                 count = igb_write_uc_addr_list(netdev);
3463                 if (count < 0) {
3464                         rctl |= E1000_RCTL_UPE;
3465                         vmolr |= E1000_VMOLR_ROPE;
3466                 }
3467                 rctl |= E1000_RCTL_VFE;
3468         }
3469         wr32(E1000_RCTL, rctl);
3470
3471         /*
3472          * In order to support SR-IOV and eventually VMDq it is necessary to set
3473          * the VMOLR to enable the appropriate modes.  Without this workaround
3474          * we will have issues with VLAN tag stripping not being done for frames
3475          * that are only arriving because we are the default pool
3476          */
3477         if (hw->mac.type < e1000_82576)
3478                 return;
3479
3480         vmolr |= rd32(E1000_VMOLR(vfn)) &
3481                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3482         wr32(E1000_VMOLR(vfn), vmolr);
3483         igb_restore_vf_multicasts(adapter);
3484 }
3485
3486 static void igb_check_wvbr(struct igb_adapter *adapter)
3487 {
3488         struct e1000_hw *hw = &adapter->hw;
3489         u32 wvbr = 0;
3490
3491         switch (hw->mac.type) {
3492         case e1000_82576:
3493         case e1000_i350:
3494                 if (!(wvbr = rd32(E1000_WVBR)))
3495                         return;
3496                 break;
3497         default:
3498                 break;
3499         }
3500
3501         adapter->wvbr |= wvbr;
3502 }
3503
3504 #define IGB_STAGGERED_QUEUE_OFFSET 8
3505
3506 static void igb_spoof_check(struct igb_adapter *adapter)
3507 {
3508         int j;
3509
3510         if (!adapter->wvbr)
3511                 return;
3512
3513         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3514                 if (adapter->wvbr & (1 << j) ||
3515                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3516                         dev_warn(&adapter->pdev->dev,
3517                                 "Spoof event(s) detected on VF %d\n", j);
3518                         adapter->wvbr &=
3519                                 ~((1 << j) |
3520                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3521                 }
3522         }
3523 }
3524
3525 /* Need to wait a few seconds after link up to get diagnostic information from
3526  * the phy */
3527 static void igb_update_phy_info(unsigned long data)
3528 {
3529         struct igb_adapter *adapter = (struct igb_adapter *) data;
3530         igb_get_phy_info(&adapter->hw);
3531 }
3532
3533 /**
3534  * igb_has_link - check shared code for link and determine up/down
3535  * @adapter: pointer to driver private info
3536  **/
3537 bool igb_has_link(struct igb_adapter *adapter)
3538 {
3539         struct e1000_hw *hw = &adapter->hw;
3540         bool link_active = false;
3541         s32 ret_val = 0;
3542
3543         /* get_link_status is set on LSC (link status) interrupt or
3544          * rx sequence error interrupt.  get_link_status will stay
3545          * false until the e1000_check_for_link establishes link
3546          * for copper adapters ONLY
3547          */
3548         switch (hw->phy.media_type) {
3549         case e1000_media_type_copper:
3550                 if (hw->mac.get_link_status) {
3551                         ret_val = hw->mac.ops.check_for_link(hw);
3552                         link_active = !hw->mac.get_link_status;
3553                 } else {
3554                         link_active = true;
3555                 }
3556                 break;
3557         case e1000_media_type_internal_serdes:
3558                 ret_val = hw->mac.ops.check_for_link(hw);
3559                 link_active = hw->mac.serdes_has_link;
3560                 break;
3561         default:
3562         case e1000_media_type_unknown:
3563                 break;
3564         }
3565
3566         return link_active;
3567 }
3568
3569 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3570 {
3571         bool ret = false;
3572         u32 ctrl_ext, thstat;
3573
3574         /* check for thermal sensor event on i350, copper only */
3575         if (hw->mac.type == e1000_i350) {
3576                 thstat = rd32(E1000_THSTAT);
3577                 ctrl_ext = rd32(E1000_CTRL_EXT);
3578
3579                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3580                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3581                         ret = !!(thstat & event);
3582                 }
3583         }
3584
3585         return ret;
3586 }
3587
3588 /**
3589  * igb_watchdog - Timer Call-back
3590  * @data: pointer to adapter cast into an unsigned long
3591  **/
3592 static void igb_watchdog(unsigned long data)
3593 {
3594         struct igb_adapter *adapter = (struct igb_adapter *)data;
3595         /* Do the rest outside of interrupt context */
3596         schedule_work(&adapter->watchdog_task);
3597 }
3598
3599 static void igb_watchdog_task(struct work_struct *work)
3600 {
3601         struct igb_adapter *adapter = container_of(work,
3602                                                    struct igb_adapter,
3603                                                    watchdog_task);
3604         struct e1000_hw *hw = &adapter->hw;
3605         struct net_device *netdev = adapter->netdev;
3606         u32 link;
3607         int i;
3608
3609         link = igb_has_link(adapter);
3610         if (link) {
3611                 if (!netif_carrier_ok(netdev)) {
3612                         u32 ctrl;
3613                         hw->mac.ops.get_speed_and_duplex(hw,
3614                                                          &adapter->link_speed,
3615                                                          &adapter->link_duplex);
3616
3617                         ctrl = rd32(E1000_CTRL);
3618                         /* Links status message must follow this format */
3619                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3620                                  "Flow Control: %s\n",
3621                                netdev->name,
3622                                adapter->link_speed,
3623                                adapter->link_duplex == FULL_DUPLEX ?
3624                                  "Full Duplex" : "Half Duplex",
3625                                ((ctrl & E1000_CTRL_TFCE) &&
3626                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3627                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3628                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3629
3630                         /* check for thermal sensor event */
3631                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3632                                 printk(KERN_INFO "igb: %s The network adapter "
3633                                                  "link speed was downshifted "
3634                                                  "because it overheated.\n",
3635                                                  netdev->name);
3636                         }
3637
3638                         /* adjust timeout factor according to speed/duplex */
3639                         adapter->tx_timeout_factor = 1;
3640                         switch (adapter->link_speed) {
3641                         case SPEED_10:
3642                                 adapter->tx_timeout_factor = 14;
3643                                 break;
3644                         case SPEED_100:
3645                                 /* maybe add some timeout factor ? */
3646                                 break;
3647                         }
3648
3649                         netif_carrier_on(netdev);
3650
3651                         igb_ping_all_vfs(adapter);
3652                         igb_check_vf_rate_limit(adapter);
3653
3654                         /* link state has changed, schedule phy info update */
3655                         if (!test_bit(__IGB_DOWN, &adapter->state))
3656                                 mod_timer(&adapter->phy_info_timer,
3657                                           round_jiffies(jiffies + 2 * HZ));
3658                 }
3659         } else {
3660                 if (netif_carrier_ok(netdev)) {
3661                         adapter->link_speed = 0;
3662                         adapter->link_duplex = 0;
3663
3664                         /* check for thermal sensor event */
3665                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3666                                 printk(KERN_ERR "igb: %s The network adapter "
3667                                                 "was stopped because it "
3668                                                 "overheated.\n",
3669                                                 netdev->name);
3670                         }
3671
3672                         /* Links status message must follow this format */
3673                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3674                                netdev->name);
3675                         netif_carrier_off(netdev);
3676
3677                         igb_ping_all_vfs(adapter);
3678
3679                         /* link state has changed, schedule phy info update */
3680                         if (!test_bit(__IGB_DOWN, &adapter->state))
3681                                 mod_timer(&adapter->phy_info_timer,
3682                                           round_jiffies(jiffies + 2 * HZ));
3683                 }
3684         }
3685
3686         spin_lock(&adapter->stats64_lock);
3687         igb_update_stats(adapter, &adapter->stats64);
3688         spin_unlock(&adapter->stats64_lock);
3689
3690         for (i = 0; i < adapter->num_tx_queues; i++) {
3691                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3692                 if (!netif_carrier_ok(netdev)) {
3693                         /* We've lost link, so the controller stops DMA,
3694                          * but we've got queued Tx work that's never going
3695                          * to get done, so reset controller to flush Tx.
3696                          * (Do the reset outside of interrupt context). */
3697                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3698                                 adapter->tx_timeout_count++;
3699                                 schedule_work(&adapter->reset_task);
3700                                 /* return immediately since reset is imminent */
3701                                 return;
3702                         }
3703                 }
3704
3705                 /* Force detection of hung controller every watchdog period */
3706                 tx_ring->detect_tx_hung = true;
3707         }
3708
3709         /* Cause software interrupt to ensure rx ring is cleaned */
3710         if (adapter->msix_entries) {
3711                 u32 eics = 0;
3712                 for (i = 0; i < adapter->num_q_vectors; i++) {
3713                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3714                         eics |= q_vector->eims_value;
3715                 }
3716                 wr32(E1000_EICS, eics);
3717         } else {
3718                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3719         }
3720
3721         igb_spoof_check(adapter);
3722
3723         /* Reset the timer */
3724         if (!test_bit(__IGB_DOWN, &adapter->state))
3725                 mod_timer(&adapter->watchdog_timer,
3726                           round_jiffies(jiffies + 2 * HZ));
3727 }
3728
3729 enum latency_range {
3730         lowest_latency = 0,
3731         low_latency = 1,
3732         bulk_latency = 2,
3733         latency_invalid = 255
3734 };
3735
3736 /**
3737  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3738  *
3739  *      Stores a new ITR value based on strictly on packet size.  This
3740  *      algorithm is less sophisticated than that used in igb_update_itr,
3741  *      due to the difficulty of synchronizing statistics across multiple
3742  *      receive rings.  The divisors and thresholds used by this function
3743  *      were determined based on theoretical maximum wire speed and testing
3744  *      data, in order to minimize response time while increasing bulk
3745  *      throughput.
3746  *      This functionality is controlled by the InterruptThrottleRate module
3747  *      parameter (see igb_param.c)
3748  *      NOTE:  This function is called only when operating in a multiqueue
3749  *             receive environment.
3750  * @q_vector: pointer to q_vector
3751  **/
3752 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3753 {
3754         int new_val = q_vector->itr_val;
3755         int avg_wire_size = 0;
3756         struct igb_adapter *adapter = q_vector->adapter;
3757         struct igb_ring *ring;
3758         unsigned int packets;
3759
3760         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3761          * ints/sec - ITR timer value of 120 ticks.
3762          */
3763         if (adapter->link_speed != SPEED_1000) {
3764                 new_val = 976;
3765                 goto set_itr_val;
3766         }
3767
3768         ring = q_vector->rx_ring;
3769         if (ring) {
3770                 packets = ACCESS_ONCE(ring->total_packets);
3771
3772                 if (packets)
3773                         avg_wire_size = ring->total_bytes / packets;
3774         }
3775
3776         ring = q_vector->tx_ring;
3777         if (ring) {
3778                 packets = ACCESS_ONCE(ring->total_packets);
3779
3780                 if (packets)
3781                         avg_wire_size = max_t(u32, avg_wire_size,
3782                                               ring->total_bytes / packets);
3783         }
3784
3785         /* if avg_wire_size isn't set no work was done */
3786         if (!avg_wire_size)
3787                 goto clear_counts;
3788
3789         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3790         avg_wire_size += 24;
3791
3792         /* Don't starve jumbo frames */
3793         avg_wire_size = min(avg_wire_size, 3000);
3794
3795         /* Give a little boost to mid-size frames */
3796         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3797                 new_val = avg_wire_size / 3;
3798         else
3799                 new_val = avg_wire_size / 2;
3800
3801         /* when in itr mode 3 do not exceed 20K ints/sec */
3802         if (adapter->rx_itr_setting == 3 && new_val < 196)
3803                 new_val = 196;
3804
3805 set_itr_val:
3806         if (new_val != q_vector->itr_val) {
3807                 q_vector->itr_val = new_val;
3808                 q_vector->set_itr = 1;
3809         }
3810 clear_counts:
3811         if (q_vector->rx_ring) {
3812                 q_vector->rx_ring->total_bytes = 0;
3813                 q_vector->rx_ring->total_packets = 0;
3814         }
3815         if (q_vector->tx_ring) {
3816                 q_vector->tx_ring->total_bytes = 0;
3817                 q_vector->tx_ring->total_packets = 0;
3818         }
3819 }
3820
3821 /**
3822  * igb_update_itr - update the dynamic ITR value based on statistics
3823  *      Stores a new ITR value based on packets and byte
3824  *      counts during the last interrupt.  The advantage of per interrupt
3825  *      computation is faster updates and more accurate ITR for the current
3826  *      traffic pattern.  Constants in this function were computed
3827  *      based on theoretical maximum wire speed and thresholds were set based
3828  *      on testing data as well as attempting to minimize response time
3829  *      while increasing bulk throughput.
3830  *      this functionality is controlled by the InterruptThrottleRate module
3831  *      parameter (see igb_param.c)
3832  *      NOTE:  These calculations are only valid when operating in a single-
3833  *             queue environment.
3834  * @adapter: pointer to adapter
3835  * @itr_setting: current q_vector->itr_val
3836  * @packets: the number of packets during this measurement interval
3837  * @bytes: the number of bytes during this measurement interval
3838  **/
3839 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3840                                    int packets, int bytes)
3841 {
3842         unsigned int retval = itr_setting;
3843
3844         if (packets == 0)
3845                 goto update_itr_done;
3846
3847         switch (itr_setting) {
3848         case lowest_latency:
3849                 /* handle TSO and jumbo frames */
3850                 if (bytes/packets > 8000)
3851                         retval = bulk_latency;
3852                 else if ((packets < 5) && (bytes > 512))
3853                         retval = low_latency;
3854                 break;
3855         case low_latency:  /* 50 usec aka 20000 ints/s */
3856                 if (bytes > 10000) {
3857                         /* this if handles the TSO accounting */
3858                         if (bytes/packets > 8000) {
3859                                 retval = bulk_latency;
3860                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3861                                 retval = bulk_latency;
3862                         } else if ((packets > 35)) {
3863                                 retval = lowest_latency;
3864                         }
3865                 } else if (bytes/packets > 2000) {
3866                         retval = bulk_latency;
3867                 } else if (packets <= 2 && bytes < 512) {
3868                         retval = lowest_latency;
3869                 }
3870                 break;
3871         case bulk_latency: /* 250 usec aka 4000 ints/s */
3872                 if (bytes > 25000) {
3873                         if (packets > 35)
3874                                 retval = low_latency;
3875                 } else if (bytes < 1500) {
3876                         retval = low_latency;
3877                 }
3878                 break;
3879         }
3880
3881 update_itr_done:
3882         return retval;
3883 }
3884
3885 static void igb_set_itr(struct igb_adapter *adapter)
3886 {
3887         struct igb_q_vector *q_vector = adapter->q_vector[0];
3888         u16 current_itr;
3889         u32 new_itr = q_vector->itr_val;
3890
3891         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3892         if (adapter->link_speed != SPEED_1000) {
3893                 current_itr = 0;
3894                 new_itr = 4000;
3895                 goto set_itr_now;
3896         }
3897
3898         adapter->rx_itr = igb_update_itr(adapter,
3899                                     adapter->rx_itr,
3900                                     q_vector->rx_ring->total_packets,
3901                                     q_vector->rx_ring->total_bytes);
3902
3903         adapter->tx_itr = igb_update_itr(adapter,
3904                                     adapter->tx_itr,
3905                                     q_vector->tx_ring->total_packets,
3906                                     q_vector->tx_ring->total_bytes);
3907         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3908
3909         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3910         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3911                 current_itr = low_latency;
3912
3913         switch (current_itr) {
3914         /* counts and packets in update_itr are dependent on these numbers */
3915         case lowest_latency:
3916                 new_itr = 56;  /* aka 70,000 ints/sec */
3917                 break;
3918         case low_latency:
3919                 new_itr = 196; /* aka 20,000 ints/sec */
3920                 break;
3921         case bulk_latency:
3922                 new_itr = 980; /* aka 4,000 ints/sec */
3923                 break;
3924         default:
3925                 break;
3926         }
3927
3928 set_itr_now:
3929         q_vector->rx_ring->total_bytes = 0;
3930         q_vector->rx_ring->total_packets = 0;
3931         q_vector->tx_ring->total_bytes = 0;
3932         q_vector->tx_ring->total_packets = 0;
3933
3934         if (new_itr != q_vector->itr_val) {
3935                 /* this attempts to bias the interrupt rate towards Bulk
3936                  * by adding intermediate steps when interrupt rate is
3937                  * increasing */
3938                 new_itr = new_itr > q_vector->itr_val ?
3939                              max((new_itr * q_vector->itr_val) /
3940                                  (new_itr + (q_vector->itr_val >> 2)),
3941                                  new_itr) :
3942                              new_itr;
3943                 /* Don't write the value here; it resets the adapter's
3944                  * internal timer, and causes us to delay far longer than
3945                  * we should between interrupts.  Instead, we write the ITR
3946                  * value at the beginning of the next interrupt so the timing
3947                  * ends up being correct.
3948                  */
3949                 q_vector->itr_val = new_itr;
3950                 q_vector->set_itr = 1;
3951         }
3952 }
3953
3954 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3955                      u32 type_tucmd, u32 mss_l4len_idx)
3956 {
3957         struct e1000_adv_tx_context_desc *context_desc;
3958         u16 i = tx_ring->next_to_use;
3959
3960         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3961
3962         i++;
3963         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3964
3965         /* set bits to identify this as an advanced context descriptor */
3966         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3967
3968         /* For 82575, context index must be unique per ring. */
3969         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3970                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3971
3972         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
3973         context_desc->seqnum_seed       = 0;
3974         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
3975         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
3976 }
3977
3978 static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
3979                           u32 tx_flags, __be16 protocol, u8 *hdr_len)
3980 {
3981         int err;
3982         u32 vlan_macip_lens, type_tucmd;
3983         u32 mss_l4len_idx, l4len;
3984
3985         if (!skb_is_gso(skb))
3986                 return 0;
3987
3988         if (skb_header_cloned(skb)) {
3989                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3990                 if (err)
3991                         return err;
3992         }
3993
3994         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3995         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
3996
3997         if (protocol == __constant_htons(ETH_P_IP)) {
3998                 struct iphdr *iph = ip_hdr(skb);
3999                 iph->tot_len = 0;
4000                 iph->check = 0;
4001                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4002                                                          iph->daddr, 0,
4003                                                          IPPROTO_TCP,
4004                                                          0);
4005                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4006         } else if (skb_is_gso_v6(skb)) {
4007                 ipv6_hdr(skb)->payload_len = 0;
4008                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4009                                                        &ipv6_hdr(skb)->daddr,
4010                                                        0, IPPROTO_TCP, 0);
4011         }
4012
4013         l4len = tcp_hdrlen(skb);
4014         *hdr_len = skb_transport_offset(skb) + l4len;
4015
4016         /* MSS L4LEN IDX */
4017         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4018         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4019
4020         /* VLAN MACLEN IPLEN */
4021         vlan_macip_lens = skb_network_header_len(skb);
4022         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4023         vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4024
4025         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4026
4027         return 1;
4028 }
4029
4030 static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
4031                                u32 tx_flags, __be16 protocol)
4032 {
4033         u32 vlan_macip_lens = 0;
4034         u32 mss_l4len_idx = 0;
4035         u32 type_tucmd = 0;
4036
4037         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4038                 if (!(tx_flags & IGB_TX_FLAGS_VLAN))
4039                         return false;
4040         } else {
4041                 u8 l4_hdr = 0;
4042                 switch (protocol) {
4043                 case __constant_htons(ETH_P_IP):
4044                         vlan_macip_lens |= skb_network_header_len(skb);
4045                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4046                         l4_hdr = ip_hdr(skb)->protocol;
4047                         break;
4048                 case __constant_htons(ETH_P_IPV6):
4049                         vlan_macip_lens |= skb_network_header_len(skb);
4050                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4051                         break;
4052                 default:
4053                         if (unlikely(net_ratelimit())) {
4054                                 dev_warn(tx_ring->dev,
4055                                  "partial checksum but proto=%x!\n",
4056                                  protocol);
4057                         }
4058                         break;
4059                 }
4060
4061                 switch (l4_hdr) {
4062                 case IPPROTO_TCP:
4063                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4064                         mss_l4len_idx = tcp_hdrlen(skb) <<
4065                                         E1000_ADVTXD_L4LEN_SHIFT;
4066                         break;
4067                 case IPPROTO_SCTP:
4068                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4069                         mss_l4len_idx = sizeof(struct sctphdr) <<
4070                                         E1000_ADVTXD_L4LEN_SHIFT;
4071                         break;
4072                 case IPPROTO_UDP:
4073                         mss_l4len_idx = sizeof(struct udphdr) <<
4074                                         E1000_ADVTXD_L4LEN_SHIFT;
4075                         break;
4076                 default:
4077                         if (unlikely(net_ratelimit())) {
4078                                 dev_warn(tx_ring->dev,
4079                                  "partial checksum but l4 proto=%x!\n",
4080                                  l4_hdr);
4081                         }
4082                         break;
4083                 }
4084         }
4085
4086         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4087         vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4088
4089         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4090
4091         return (skb->ip_summed == CHECKSUM_PARTIAL);
4092 }
4093
4094 static __le32 igb_tx_cmd_type(u32 tx_flags)
4095 {
4096         /* set type for advanced descriptor with frame checksum insertion */
4097         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4098                                       E1000_ADVTXD_DCMD_IFCS |
4099                                       E1000_ADVTXD_DCMD_DEXT);
4100
4101         /* set HW vlan bit if vlan is present */
4102         if (tx_flags & IGB_TX_FLAGS_VLAN)
4103                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4104
4105         /* set timestamp bit if present */
4106         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4107                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4108
4109         /* set segmentation bits for TSO */
4110         if (tx_flags & IGB_TX_FLAGS_TSO)
4111                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4112
4113         return cmd_type;
4114 }
4115
4116 static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
4117                                    struct igb_ring *tx_ring)
4118 {
4119         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4120
4121         /* 82575 requires a unique index per ring if any offload is enabled */
4122         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4123             (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX))
4124                 olinfo_status |= tx_ring->reg_idx << 4;
4125
4126         /* insert L4 checksum */
4127         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4128                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4129
4130                 /* insert IPv4 checksum */
4131                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4132                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4133         }
4134
4135         return cpu_to_le32(olinfo_status);
4136 }
4137
4138 /*
4139  * The largest size we can write to the descriptor is 65535.  In order to
4140  * maintain a power of two alignment we have to limit ourselves to 32K.
4141  */
4142 #define IGB_MAX_TXD_PWR 15
4143 #define IGB_MAX_DATA_PER_TXD    (1 << IGB_MAX_TXD_PWR)
4144
4145 static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
4146                        struct igb_tx_buffer *first, u32 tx_flags,
4147                        const u8 hdr_len)
4148 {
4149         struct igb_tx_buffer *tx_buffer_info;
4150         union e1000_adv_tx_desc *tx_desc;
4151         dma_addr_t dma;
4152         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4153         unsigned int data_len = skb->data_len;
4154         unsigned int size = skb_headlen(skb);
4155         unsigned int paylen = skb->len - hdr_len;
4156         __le32 cmd_type;
4157         u16 i = tx_ring->next_to_use;
4158         u16 gso_segs;
4159
4160         if (tx_flags & IGB_TX_FLAGS_TSO)
4161                 gso_segs = skb_shinfo(skb)->gso_segs;
4162         else
4163                 gso_segs = 1;
4164
4165         /* multiply data chunks by size of headers */
4166         first->bytecount = paylen + (gso_segs * hdr_len);
4167         first->gso_segs = gso_segs;
4168         first->skb = skb;
4169
4170         tx_desc = IGB_TX_DESC(tx_ring, i);
4171
4172         tx_desc->read.olinfo_status =
4173                 igb_tx_olinfo_status(tx_flags, paylen, tx_ring);
4174
4175         cmd_type = igb_tx_cmd_type(tx_flags);
4176
4177         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4178         if (dma_mapping_error(tx_ring->dev, dma))
4179                 goto dma_error;
4180
4181         /* record length, and DMA address */
4182         first->length = size;
4183         first->dma = dma;
4184         first->tx_flags = tx_flags;
4185         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4186
4187         for (;;) {
4188                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4189                         tx_desc->read.cmd_type_len =
4190                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4191
4192                         i++;
4193                         tx_desc++;
4194                         if (i == tx_ring->count) {
4195                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4196                                 i = 0;
4197                         }
4198
4199                         dma += IGB_MAX_DATA_PER_TXD;
4200                         size -= IGB_MAX_DATA_PER_TXD;
4201
4202                         tx_desc->read.olinfo_status = 0;
4203                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4204                 }
4205
4206                 if (likely(!data_len))
4207                         break;
4208
4209                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4210
4211                 i++;
4212                 tx_desc++;
4213                 if (i == tx_ring->count) {
4214                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4215                         i = 0;
4216                 }
4217
4218                 size = frag->size;
4219                 data_len -= size;
4220
4221                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4222                                    size, DMA_TO_DEVICE);
4223                 if (dma_mapping_error(tx_ring->dev, dma))
4224                         goto dma_error;
4225
4226                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4227                 tx_buffer_info->length = size;
4228                 tx_buffer_info->dma = dma;
4229
4230                 tx_desc->read.olinfo_status = 0;
4231                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4232
4233                 frag++;
4234         }
4235
4236         /* write last descriptor with RS and EOP bits */
4237         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4238         tx_desc->read.cmd_type_len = cmd_type;
4239
4240         /* set the timestamp */
4241         first->time_stamp = jiffies;
4242
4243         /*
4244          * Force memory writes to complete before letting h/w know there
4245          * are new descriptors to fetch.  (Only applicable for weak-ordered
4246          * memory model archs, such as IA-64).
4247          *
4248          * We also need this memory barrier to make certain all of the
4249          * status bits have been updated before next_to_watch is written.
4250          */
4251         wmb();
4252
4253         /* set next_to_watch value indicating a packet is present */
4254         first->next_to_watch = tx_desc;
4255
4256         i++;
4257         if (i == tx_ring->count)
4258                 i = 0;
4259
4260         tx_ring->next_to_use = i;
4261
4262         writel(i, tx_ring->tail);
4263
4264         /* we need this if more than one processor can write to our tail
4265          * at a time, it syncronizes IO on IA64/Altix systems */
4266         mmiowb();
4267
4268         return;
4269
4270 dma_error:
4271         dev_err(tx_ring->dev, "TX DMA map failed\n");
4272
4273         /* clear dma mappings for failed tx_buffer_info map */
4274         for (;;) {
4275                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4276                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4277                 if (tx_buffer_info == first)
4278                         break;
4279                 if (i == 0)
4280                         i = tx_ring->count;
4281                 i--;
4282         }
4283
4284         tx_ring->next_to_use = i;
4285 }
4286
4287 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4288 {
4289         struct net_device *netdev = tx_ring->netdev;
4290
4291         netif_stop_subqueue(netdev, tx_ring->queue_index);
4292
4293         /* Herbert's original patch had:
4294          *  smp_mb__after_netif_stop_queue();
4295          * but since that doesn't exist yet, just open code it. */
4296         smp_mb();
4297
4298         /* We need to check again in a case another CPU has just
4299          * made room available. */
4300         if (igb_desc_unused(tx_ring) < size)
4301                 return -EBUSY;
4302
4303         /* A reprieve! */
4304         netif_wake_subqueue(netdev, tx_ring->queue_index);
4305
4306         u64_stats_update_begin(&tx_ring->tx_syncp2);
4307         tx_ring->tx_stats.restart_queue2++;
4308         u64_stats_update_end(&tx_ring->tx_syncp2);
4309
4310         return 0;
4311 }
4312
4313 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4314 {
4315         if (igb_desc_unused(tx_ring) >= size)
4316                 return 0;
4317         return __igb_maybe_stop_tx(tx_ring, size);
4318 }
4319
4320 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4321                                 struct igb_ring *tx_ring)
4322 {
4323         struct igb_tx_buffer *first;
4324         int tso;
4325         u32 tx_flags = 0;
4326         __be16 protocol = vlan_get_protocol(skb);
4327         u8 hdr_len = 0;
4328
4329         /* need: 1 descriptor per page,
4330          *       + 2 desc gap to keep tail from touching head,
4331          *       + 1 desc for skb->data,
4332          *       + 1 desc for context descriptor,
4333          * otherwise try next time */
4334         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4335                 /* this is a hard error */
4336                 return NETDEV_TX_BUSY;
4337         }
4338
4339         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4340                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4341                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4342         }
4343
4344         if (vlan_tx_tag_present(skb)) {
4345                 tx_flags |= IGB_TX_FLAGS_VLAN;
4346                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4347         }
4348
4349         /* record the location of the first descriptor for this packet */
4350         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4351
4352         tso = igb_tso(tx_ring, skb, tx_flags, protocol, &hdr_len);
4353         if (tso < 0) {
4354                 goto out_drop;
4355         } else if (tso) {
4356                 tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM;
4357                 if (protocol == htons(ETH_P_IP))
4358                         tx_flags |= IGB_TX_FLAGS_IPV4;
4359         } else if (igb_tx_csum(tx_ring, skb, tx_flags, protocol) &&
4360                    (skb->ip_summed == CHECKSUM_PARTIAL)) {
4361                 tx_flags |= IGB_TX_FLAGS_CSUM;
4362         }
4363
4364         igb_tx_map(tx_ring, skb, first, tx_flags, hdr_len);
4365
4366         /* Make sure there is space in the ring for the next send. */
4367         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4368
4369         return NETDEV_TX_OK;
4370
4371 out_drop:
4372         dev_kfree_skb_any(skb);
4373         return NETDEV_TX_OK;
4374 }
4375
4376 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4377                                                     struct sk_buff *skb)
4378 {
4379         unsigned int r_idx = skb->queue_mapping;
4380
4381         if (r_idx >= adapter->num_tx_queues)
4382                 r_idx = r_idx % adapter->num_tx_queues;
4383
4384         return adapter->tx_ring[r_idx];
4385 }
4386
4387 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4388                                   struct net_device *netdev)
4389 {
4390         struct igb_adapter *adapter = netdev_priv(netdev);
4391
4392         if (test_bit(__IGB_DOWN, &adapter->state)) {
4393                 dev_kfree_skb_any(skb);
4394                 return NETDEV_TX_OK;
4395         }
4396
4397         if (skb->len <= 0) {
4398                 dev_kfree_skb_any(skb);
4399                 return NETDEV_TX_OK;
4400         }
4401
4402         /*
4403          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4404          * in order to meet this minimum size requirement.
4405          */
4406         if (skb->len < 17) {
4407                 if (skb_padto(skb, 17))
4408                         return NETDEV_TX_OK;
4409                 skb->len = 17;
4410         }
4411
4412         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4413 }
4414
4415 /**
4416  * igb_tx_timeout - Respond to a Tx Hang
4417  * @netdev: network interface device structure
4418  **/
4419 static void igb_tx_timeout(struct net_device *netdev)
4420 {
4421         struct igb_adapter *adapter = netdev_priv(netdev);
4422         struct e1000_hw *hw = &adapter->hw;
4423
4424         /* Do the reset outside of interrupt context */
4425         adapter->tx_timeout_count++;
4426
4427         if (hw->mac.type == e1000_82580)
4428                 hw->dev_spec._82575.global_device_reset = true;
4429
4430         schedule_work(&adapter->reset_task);
4431         wr32(E1000_EICS,
4432              (adapter->eims_enable_mask & ~adapter->eims_other));
4433 }
4434
4435 static void igb_reset_task(struct work_struct *work)
4436 {
4437         struct igb_adapter *adapter;
4438         adapter = container_of(work, struct igb_adapter, reset_task);
4439
4440         igb_dump(adapter);
4441         netdev_err(adapter->netdev, "Reset adapter\n");
4442         igb_reinit_locked(adapter);
4443 }
4444
4445 /**
4446  * igb_get_stats64 - Get System Network Statistics
4447  * @netdev: network interface device structure
4448  * @stats: rtnl_link_stats64 pointer
4449  *
4450  **/
4451 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4452                                                  struct rtnl_link_stats64 *stats)
4453 {
4454         struct igb_adapter *adapter = netdev_priv(netdev);
4455
4456         spin_lock(&adapter->stats64_lock);
4457         igb_update_stats(adapter, &adapter->stats64);
4458         memcpy(stats, &adapter->stats64, sizeof(*stats));
4459         spin_unlock(&adapter->stats64_lock);
4460
4461         return stats;
4462 }
4463
4464 /**
4465  * igb_change_mtu - Change the Maximum Transfer Unit
4466  * @netdev: network interface device structure
4467  * @new_mtu: new value for maximum frame size
4468  *
4469  * Returns 0 on success, negative on failure
4470  **/
4471 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4472 {
4473         struct igb_adapter *adapter = netdev_priv(netdev);
4474         struct pci_dev *pdev = adapter->pdev;
4475         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4476
4477         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4478                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4479                 return -EINVAL;
4480         }
4481
4482 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4483         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4484                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4485                 return -EINVAL;
4486         }
4487
4488         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4489                 msleep(1);
4490
4491         /* igb_down has a dependency on max_frame_size */
4492         adapter->max_frame_size = max_frame;
4493
4494         if (netif_running(netdev))
4495                 igb_down(adapter);
4496
4497         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4498                  netdev->mtu, new_mtu);
4499         netdev->mtu = new_mtu;
4500
4501         if (netif_running(netdev))
4502                 igb_up(adapter);
4503         else
4504                 igb_reset(adapter);
4505
4506         clear_bit(__IGB_RESETTING, &adapter->state);
4507
4508         return 0;
4509 }
4510
4511 /**
4512  * igb_update_stats - Update the board statistics counters
4513  * @adapter: board private structure
4514  **/
4515
4516 void igb_update_stats(struct igb_adapter *adapter,
4517                       struct rtnl_link_stats64 *net_stats)
4518 {
4519         struct e1000_hw *hw = &adapter->hw;
4520         struct pci_dev *pdev = adapter->pdev;
4521         u32 reg, mpc;
4522         u16 phy_tmp;
4523         int i;
4524         u64 bytes, packets;
4525         unsigned int start;
4526         u64 _bytes, _packets;
4527
4528 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4529
4530         /*
4531          * Prevent stats update while adapter is being reset, or if the pci
4532          * connection is down.
4533          */
4534         if (adapter->link_speed == 0)
4535                 return;
4536         if (pci_channel_offline(pdev))
4537                 return;
4538
4539         bytes = 0;
4540         packets = 0;
4541         for (i = 0; i < adapter->num_rx_queues; i++) {
4542                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4543                 struct igb_ring *ring = adapter->rx_ring[i];
4544
4545                 ring->rx_stats.drops += rqdpc_tmp;
4546                 net_stats->rx_fifo_errors += rqdpc_tmp;
4547
4548                 do {
4549                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4550                         _bytes = ring->rx_stats.bytes;
4551                         _packets = ring->rx_stats.packets;
4552                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4553                 bytes += _bytes;
4554                 packets += _packets;
4555         }
4556
4557         net_stats->rx_bytes = bytes;
4558         net_stats->rx_packets = packets;
4559
4560         bytes = 0;
4561         packets = 0;
4562         for (i = 0; i < adapter->num_tx_queues; i++) {
4563                 struct igb_ring *ring = adapter->tx_ring[i];
4564                 do {
4565                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4566                         _bytes = ring->tx_stats.bytes;
4567                         _packets = ring->tx_stats.packets;
4568                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4569                 bytes += _bytes;
4570                 packets += _packets;
4571         }
4572         net_stats->tx_bytes = bytes;
4573         net_stats->tx_packets = packets;
4574
4575         /* read stats registers */
4576         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4577         adapter->stats.gprc += rd32(E1000_GPRC);
4578         adapter->stats.gorc += rd32(E1000_GORCL);
4579         rd32(E1000_GORCH); /* clear GORCL */
4580         adapter->stats.bprc += rd32(E1000_BPRC);
4581         adapter->stats.mprc += rd32(E1000_MPRC);
4582         adapter->stats.roc += rd32(E1000_ROC);
4583
4584         adapter->stats.prc64 += rd32(E1000_PRC64);
4585         adapter->stats.prc127 += rd32(E1000_PRC127);
4586         adapter->stats.prc255 += rd32(E1000_PRC255);
4587         adapter->stats.prc511 += rd32(E1000_PRC511);
4588         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4589         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4590         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4591         adapter->stats.sec += rd32(E1000_SEC);
4592
4593         mpc = rd32(E1000_MPC);
4594         adapter->stats.mpc += mpc;
4595         net_stats->rx_fifo_errors += mpc;
4596         adapter->stats.scc += rd32(E1000_SCC);
4597         adapter->stats.ecol += rd32(E1000_ECOL);
4598         adapter->stats.mcc += rd32(E1000_MCC);
4599         adapter->stats.latecol += rd32(E1000_LATECOL);
4600         adapter->stats.dc += rd32(E1000_DC);
4601         adapter->stats.rlec += rd32(E1000_RLEC);
4602         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4603         adapter->stats.xontxc += rd32(E1000_XONTXC);
4604         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4605         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4606         adapter->stats.fcruc += rd32(E1000_FCRUC);
4607         adapter->stats.gptc += rd32(E1000_GPTC);
4608         adapter->stats.gotc += rd32(E1000_GOTCL);
4609         rd32(E1000_GOTCH); /* clear GOTCL */
4610         adapter->stats.rnbc += rd32(E1000_RNBC);
4611         adapter->stats.ruc += rd32(E1000_RUC);
4612         adapter->stats.rfc += rd32(E1000_RFC);
4613         adapter->stats.rjc += rd32(E1000_RJC);
4614         adapter->stats.tor += rd32(E1000_TORH);
4615         adapter->stats.tot += rd32(E1000_TOTH);
4616         adapter->stats.tpr += rd32(E1000_TPR);
4617
4618         adapter->stats.ptc64 += rd32(E1000_PTC64);
4619         adapter->stats.ptc127 += rd32(E1000_PTC127);
4620         adapter->stats.ptc255 += rd32(E1000_PTC255);
4621         adapter->stats.ptc511 += rd32(E1000_PTC511);
4622         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4623         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4624
4625         adapter->stats.mptc += rd32(E1000_MPTC);
4626         adapter->stats.bptc += rd32(E1000_BPTC);
4627
4628         adapter->stats.tpt += rd32(E1000_TPT);
4629         adapter->stats.colc += rd32(E1000_COLC);
4630
4631         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4632         /* read internal phy specific stats */
4633         reg = rd32(E1000_CTRL_EXT);
4634         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4635                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4636                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4637         }
4638
4639         adapter->stats.tsctc += rd32(E1000_TSCTC);
4640         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4641
4642         adapter->stats.iac += rd32(E1000_IAC);
4643         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4644         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4645         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4646         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4647         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4648         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4649         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4650         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4651
4652         /* Fill out the OS statistics structure */
4653         net_stats->multicast = adapter->stats.mprc;
4654         net_stats->collisions = adapter->stats.colc;
4655
4656         /* Rx Errors */
4657
4658         /* RLEC on some newer hardware can be incorrect so build
4659          * our own version based on RUC and ROC */
4660         net_stats->rx_errors = adapter->stats.rxerrc +
4661                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4662                 adapter->stats.ruc + adapter->stats.roc +
4663                 adapter->stats.cexterr;
4664         net_stats->rx_length_errors = adapter->stats.ruc +
4665                                       adapter->stats.roc;
4666         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4667         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4668         net_stats->rx_missed_errors = adapter->stats.mpc;
4669
4670         /* Tx Errors */
4671         net_stats->tx_errors = adapter->stats.ecol +
4672                                adapter->stats.latecol;
4673         net_stats->tx_aborted_errors = adapter->stats.ecol;
4674         net_stats->tx_window_errors = adapter->stats.latecol;
4675         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4676
4677         /* Tx Dropped needs to be maintained elsewhere */
4678
4679         /* Phy Stats */
4680         if (hw->phy.media_type == e1000_media_type_copper) {
4681                 if ((adapter->link_speed == SPEED_1000) &&
4682                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4683                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4684                         adapter->phy_stats.idle_errors += phy_tmp;
4685                 }
4686         }
4687
4688         /* Management Stats */
4689         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4690         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4691         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4692
4693         /* OS2BMC Stats */
4694         reg = rd32(E1000_MANC);
4695         if (reg & E1000_MANC_EN_BMC2OS) {
4696                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4697                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4698                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4699                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4700         }
4701 }
4702
4703 static irqreturn_t igb_msix_other(int irq, void *data)
4704 {
4705         struct igb_adapter *adapter = data;
4706         struct e1000_hw *hw = &adapter->hw;
4707         u32 icr = rd32(E1000_ICR);
4708         /* reading ICR causes bit 31 of EICR to be cleared */
4709
4710         if (icr & E1000_ICR_DRSTA)
4711                 schedule_work(&adapter->reset_task);
4712
4713         if (icr & E1000_ICR_DOUTSYNC) {
4714                 /* HW is reporting DMA is out of sync */
4715                 adapter->stats.doosync++;
4716                 /* The DMA Out of Sync is also indication of a spoof event
4717                  * in IOV mode. Check the Wrong VM Behavior register to
4718                  * see if it is really a spoof event. */
4719                 igb_check_wvbr(adapter);
4720         }
4721
4722         /* Check for a mailbox event */
4723         if (icr & E1000_ICR_VMMB)
4724                 igb_msg_task(adapter);
4725
4726         if (icr & E1000_ICR_LSC) {
4727                 hw->mac.get_link_status = 1;
4728                 /* guard against interrupt when we're going down */
4729                 if (!test_bit(__IGB_DOWN, &adapter->state))
4730                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4731         }
4732
4733         if (adapter->vfs_allocated_count)
4734                 wr32(E1000_IMS, E1000_IMS_LSC |
4735                                 E1000_IMS_VMMB |
4736                                 E1000_IMS_DOUTSYNC);
4737         else
4738                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4739         wr32(E1000_EIMS, adapter->eims_other);
4740
4741         return IRQ_HANDLED;
4742 }
4743
4744 static void igb_write_itr(struct igb_q_vector *q_vector)
4745 {
4746         struct igb_adapter *adapter = q_vector->adapter;
4747         u32 itr_val = q_vector->itr_val & 0x7FFC;
4748
4749         if (!q_vector->set_itr)
4750                 return;
4751
4752         if (!itr_val)
4753                 itr_val = 0x4;
4754
4755         if (adapter->hw.mac.type == e1000_82575)
4756                 itr_val |= itr_val << 16;
4757         else
4758                 itr_val |= 0x8000000;
4759
4760         writel(itr_val, q_vector->itr_register);
4761         q_vector->set_itr = 0;
4762 }
4763
4764 static irqreturn_t igb_msix_ring(int irq, void *data)
4765 {
4766         struct igb_q_vector *q_vector = data;
4767
4768         /* Write the ITR value calculated from the previous interrupt. */
4769         igb_write_itr(q_vector);
4770
4771         napi_schedule(&q_vector->napi);
4772
4773         return IRQ_HANDLED;
4774 }
4775
4776 #ifdef CONFIG_IGB_DCA
4777 static void igb_update_dca(struct igb_q_vector *q_vector)
4778 {
4779         struct igb_adapter *adapter = q_vector->adapter;
4780         struct e1000_hw *hw = &adapter->hw;
4781         int cpu = get_cpu();
4782
4783         if (q_vector->cpu == cpu)
4784                 goto out_no_update;
4785
4786         if (q_vector->tx_ring) {
4787                 int q = q_vector->tx_ring->reg_idx;
4788                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4789                 if (hw->mac.type == e1000_82575) {
4790                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4791                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4792                 } else {
4793                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4794                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4796                 }
4797                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4798                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4799         }
4800         if (q_vector->rx_ring) {
4801                 int q = q_vector->rx_ring->reg_idx;
4802                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4803                 if (hw->mac.type == e1000_82575) {
4804                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4805                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4806                 } else {
4807                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4808                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4809                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4810                 }
4811                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4812                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4813                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4814                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4815         }
4816         q_vector->cpu = cpu;
4817 out_no_update:
4818         put_cpu();
4819 }
4820
4821 static void igb_setup_dca(struct igb_adapter *adapter)
4822 {
4823         struct e1000_hw *hw = &adapter->hw;
4824         int i;
4825
4826         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4827                 return;
4828
4829         /* Always use CB2 mode, difference is masked in the CB driver. */
4830         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4831
4832         for (i = 0; i < adapter->num_q_vectors; i++) {
4833                 adapter->q_vector[i]->cpu = -1;
4834                 igb_update_dca(adapter->q_vector[i]);
4835         }
4836 }
4837
4838 static int __igb_notify_dca(struct device *dev, void *data)
4839 {
4840         struct net_device *netdev = dev_get_drvdata(dev);
4841         struct igb_adapter *adapter = netdev_priv(netdev);
4842         struct pci_dev *pdev = adapter->pdev;
4843         struct e1000_hw *hw = &adapter->hw;
4844         unsigned long event = *(unsigned long *)data;
4845
4846         switch (event) {
4847         case DCA_PROVIDER_ADD:
4848                 /* if already enabled, don't do it again */
4849                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4850                         break;
4851                 if (dca_add_requester(dev) == 0) {
4852                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4853                         dev_info(&pdev->dev, "DCA enabled\n");
4854                         igb_setup_dca(adapter);
4855                         break;
4856                 }
4857                 /* Fall Through since DCA is disabled. */
4858         case DCA_PROVIDER_REMOVE:
4859                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4860                         /* without this a class_device is left
4861                          * hanging around in the sysfs model */
4862                         dca_remove_requester(dev);
4863                         dev_info(&pdev->dev, "DCA disabled\n");
4864                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4865                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4866                 }
4867                 break;
4868         }
4869
4870         return 0;
4871 }
4872
4873 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4874                           void *p)
4875 {
4876         int ret_val;
4877
4878         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4879                                          __igb_notify_dca);
4880
4881         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4882 }
4883 #endif /* CONFIG_IGB_DCA */
4884
4885 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4886 {
4887         struct e1000_hw *hw = &adapter->hw;
4888         u32 ping;
4889         int i;
4890
4891         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4892                 ping = E1000_PF_CONTROL_MSG;
4893                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4894                         ping |= E1000_VT_MSGTYPE_CTS;
4895                 igb_write_mbx(hw, &ping, 1, i);
4896         }
4897 }
4898
4899 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4900 {
4901         struct e1000_hw *hw = &adapter->hw;
4902         u32 vmolr = rd32(E1000_VMOLR(vf));
4903         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4904
4905         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4906                             IGB_VF_FLAG_MULTI_PROMISC);
4907         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4908
4909         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4910                 vmolr |= E1000_VMOLR_MPME;
4911                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4912                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4913         } else {
4914                 /*
4915                  * if we have hashes and we are clearing a multicast promisc
4916                  * flag we need to write the hashes to the MTA as this step
4917                  * was previously skipped
4918                  */
4919                 if (vf_data->num_vf_mc_hashes > 30) {
4920                         vmolr |= E1000_VMOLR_MPME;
4921                 } else if (vf_data->num_vf_mc_hashes) {
4922                         int j;
4923                         vmolr |= E1000_VMOLR_ROMPE;
4924                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4925                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4926                 }
4927         }
4928
4929         wr32(E1000_VMOLR(vf), vmolr);
4930
4931         /* there are flags left unprocessed, likely not supported */
4932         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4933                 return -EINVAL;
4934
4935         return 0;
4936
4937 }
4938
4939 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4940                                   u32 *msgbuf, u32 vf)
4941 {
4942         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4943         u16 *hash_list = (u16 *)&msgbuf[1];
4944         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4945         int i;
4946
4947         /* salt away the number of multicast addresses assigned
4948          * to this VF for later use to restore when the PF multi cast
4949          * list changes
4950          */
4951         vf_data->num_vf_mc_hashes = n;
4952
4953         /* only up to 30 hash values supported */
4954         if (n > 30)
4955                 n = 30;
4956
4957         /* store the hashes for later use */
4958         for (i = 0; i < n; i++)
4959                 vf_data->vf_mc_hashes[i] = hash_list[i];
4960
4961         /* Flush and reset the mta with the new values */
4962         igb_set_rx_mode(adapter->netdev);
4963
4964         return 0;
4965 }
4966
4967 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4968 {
4969         struct e1000_hw *hw = &adapter->hw;
4970         struct vf_data_storage *vf_data;
4971         int i, j;
4972
4973         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4974                 u32 vmolr = rd32(E1000_VMOLR(i));
4975                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4976
4977                 vf_data = &adapter->vf_data[i];
4978
4979                 if ((vf_data->num_vf_mc_hashes > 30) ||
4980                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4981                         vmolr |= E1000_VMOLR_MPME;
4982                 } else if (vf_data->num_vf_mc_hashes) {
4983                         vmolr |= E1000_VMOLR_ROMPE;
4984                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4985                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4986                 }
4987                 wr32(E1000_VMOLR(i), vmolr);
4988         }
4989 }
4990
4991 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4992 {
4993         struct e1000_hw *hw = &adapter->hw;
4994         u32 pool_mask, reg, vid;
4995         int i;
4996
4997         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4998
4999         /* Find the vlan filter for this id */
5000         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5001                 reg = rd32(E1000_VLVF(i));
5002
5003                 /* remove the vf from the pool */
5004                 reg &= ~pool_mask;
5005
5006                 /* if pool is empty then remove entry from vfta */
5007                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5008                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5009                         reg = 0;
5010                         vid = reg & E1000_VLVF_VLANID_MASK;
5011                         igb_vfta_set(hw, vid, false);
5012                 }
5013
5014                 wr32(E1000_VLVF(i), reg);
5015         }
5016
5017         adapter->vf_data[vf].vlans_enabled = 0;
5018 }
5019
5020 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5021 {
5022         struct e1000_hw *hw = &adapter->hw;
5023         u32 reg, i;
5024
5025         /* The vlvf table only exists on 82576 hardware and newer */
5026         if (hw->mac.type < e1000_82576)
5027                 return -1;
5028
5029         /* we only need to do this if VMDq is enabled */
5030         if (!adapter->vfs_allocated_count)
5031                 return -1;
5032
5033         /* Find the vlan filter for this id */
5034         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5035                 reg = rd32(E1000_VLVF(i));
5036                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5037                     vid == (reg & E1000_VLVF_VLANID_MASK))
5038                         break;
5039         }
5040
5041         if (add) {
5042                 if (i == E1000_VLVF_ARRAY_SIZE) {
5043                         /* Did not find a matching VLAN ID entry that was
5044                          * enabled.  Search for a free filter entry, i.e.
5045                          * one without the enable bit set
5046                          */
5047                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5048                                 reg = rd32(E1000_VLVF(i));
5049                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5050                                         break;
5051                         }
5052                 }
5053                 if (i < E1000_VLVF_ARRAY_SIZE) {
5054                         /* Found an enabled/available entry */
5055                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5056
5057                         /* if !enabled we need to set this up in vfta */
5058                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5059                                 /* add VID to filter table */
5060                                 igb_vfta_set(hw, vid, true);
5061                                 reg |= E1000_VLVF_VLANID_ENABLE;
5062                         }
5063                         reg &= ~E1000_VLVF_VLANID_MASK;
5064                         reg |= vid;
5065                         wr32(E1000_VLVF(i), reg);
5066
5067                         /* do not modify RLPML for PF devices */
5068                         if (vf >= adapter->vfs_allocated_count)
5069                                 return 0;
5070
5071                         if (!adapter->vf_data[vf].vlans_enabled) {
5072                                 u32 size;
5073                                 reg = rd32(E1000_VMOLR(vf));
5074                                 size = reg & E1000_VMOLR_RLPML_MASK;
5075                                 size += 4;
5076                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5077                                 reg |= size;
5078                                 wr32(E1000_VMOLR(vf), reg);
5079                         }
5080
5081                         adapter->vf_data[vf].vlans_enabled++;
5082                         return 0;
5083                 }
5084         } else {
5085                 if (i < E1000_VLVF_ARRAY_SIZE) {
5086                         /* remove vf from the pool */
5087                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5088                         /* if pool is empty then remove entry from vfta */
5089                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5090                                 reg = 0;
5091                                 igb_vfta_set(hw, vid, false);
5092                         }
5093                         wr32(E1000_VLVF(i), reg);
5094
5095                         /* do not modify RLPML for PF devices */
5096                         if (vf >= adapter->vfs_allocated_count)
5097                                 return 0;
5098
5099                         adapter->vf_data[vf].vlans_enabled--;
5100                         if (!adapter->vf_data[vf].vlans_enabled) {
5101                                 u32 size;
5102                                 reg = rd32(E1000_VMOLR(vf));
5103                                 size = reg & E1000_VMOLR_RLPML_MASK;
5104                                 size -= 4;
5105                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5106                                 reg |= size;
5107                                 wr32(E1000_VMOLR(vf), reg);
5108                         }
5109                 }
5110         }
5111         return 0;
5112 }
5113
5114 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5115 {
5116         struct e1000_hw *hw = &adapter->hw;
5117
5118         if (vid)
5119                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5120         else
5121                 wr32(E1000_VMVIR(vf), 0);
5122 }
5123
5124 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5125                                int vf, u16 vlan, u8 qos)
5126 {
5127         int err = 0;
5128         struct igb_adapter *adapter = netdev_priv(netdev);
5129
5130         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5131                 return -EINVAL;
5132         if (vlan || qos) {
5133                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5134                 if (err)
5135                         goto out;
5136                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5137                 igb_set_vmolr(adapter, vf, !vlan);
5138                 adapter->vf_data[vf].pf_vlan = vlan;
5139                 adapter->vf_data[vf].pf_qos = qos;
5140                 dev_info(&adapter->pdev->dev,
5141                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5142                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5143                         dev_warn(&adapter->pdev->dev,
5144                                  "The VF VLAN has been set,"
5145                                  " but the PF device is not up.\n");
5146                         dev_warn(&adapter->pdev->dev,
5147                                  "Bring the PF device up before"
5148                                  " attempting to use the VF device.\n");
5149                 }
5150         } else {
5151                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5152                                    false, vf);
5153                 igb_set_vmvir(adapter, vlan, vf);
5154                 igb_set_vmolr(adapter, vf, true);
5155                 adapter->vf_data[vf].pf_vlan = 0;
5156                 adapter->vf_data[vf].pf_qos = 0;
5157        }
5158 out:
5159        return err;
5160 }
5161
5162 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5163 {
5164         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5165         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5166
5167         return igb_vlvf_set(adapter, vid, add, vf);
5168 }
5169
5170 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5171 {
5172         /* clear flags - except flag that indicates PF has set the MAC */
5173         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5174         adapter->vf_data[vf].last_nack = jiffies;
5175
5176         /* reset offloads to defaults */
5177         igb_set_vmolr(adapter, vf, true);
5178
5179         /* reset vlans for device */
5180         igb_clear_vf_vfta(adapter, vf);
5181         if (adapter->vf_data[vf].pf_vlan)
5182                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5183                                     adapter->vf_data[vf].pf_vlan,
5184                                     adapter->vf_data[vf].pf_qos);
5185         else
5186                 igb_clear_vf_vfta(adapter, vf);
5187
5188         /* reset multicast table array for vf */
5189         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5190
5191         /* Flush and reset the mta with the new values */
5192         igb_set_rx_mode(adapter->netdev);
5193 }
5194
5195 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5196 {
5197         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5198
5199         /* generate a new mac address as we were hotplug removed/added */
5200         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5201                 random_ether_addr(vf_mac);
5202
5203         /* process remaining reset events */
5204         igb_vf_reset(adapter, vf);
5205 }
5206
5207 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5208 {
5209         struct e1000_hw *hw = &adapter->hw;
5210         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5211         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5212         u32 reg, msgbuf[3];
5213         u8 *addr = (u8 *)(&msgbuf[1]);
5214
5215         /* process all the same items cleared in a function level reset */
5216         igb_vf_reset(adapter, vf);
5217
5218         /* set vf mac address */
5219         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5220
5221         /* enable transmit and receive for vf */
5222         reg = rd32(E1000_VFTE);
5223         wr32(E1000_VFTE, reg | (1 << vf));
5224         reg = rd32(E1000_VFRE);
5225         wr32(E1000_VFRE, reg | (1 << vf));
5226
5227         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5228
5229         /* reply to reset with ack and vf mac address */
5230         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5231         memcpy(addr, vf_mac, 6);
5232         igb_write_mbx(hw, msgbuf, 3, vf);
5233 }
5234
5235 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5236 {
5237         /*
5238          * The VF MAC Address is stored in a packed array of bytes
5239          * starting at the second 32 bit word of the msg array
5240          */
5241         unsigned char *addr = (char *)&msg[1];
5242         int err = -1;
5243
5244         if (is_valid_ether_addr(addr))
5245                 err = igb_set_vf_mac(adapter, vf, addr);
5246
5247         return err;
5248 }
5249
5250 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5251 {
5252         struct e1000_hw *hw = &adapter->hw;
5253         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5254         u32 msg = E1000_VT_MSGTYPE_NACK;
5255
5256         /* if device isn't clear to send it shouldn't be reading either */
5257         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5258             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5259                 igb_write_mbx(hw, &msg, 1, vf);
5260                 vf_data->last_nack = jiffies;
5261         }
5262 }
5263
5264 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5265 {
5266         struct pci_dev *pdev = adapter->pdev;
5267         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5268         struct e1000_hw *hw = &adapter->hw;
5269         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5270         s32 retval;
5271
5272         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5273
5274         if (retval) {
5275                 /* if receive failed revoke VF CTS stats and restart init */
5276                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5277                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5278                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5279                         return;
5280                 goto out;
5281         }
5282
5283         /* this is a message we already processed, do nothing */
5284         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5285                 return;
5286
5287         /*
5288          * until the vf completes a reset it should not be
5289          * allowed to start any configuration.
5290          */
5291
5292         if (msgbuf[0] == E1000_VF_RESET) {
5293                 igb_vf_reset_msg(adapter, vf);
5294                 return;
5295         }
5296
5297         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5298                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5299                         return;
5300                 retval = -1;
5301                 goto out;
5302         }
5303
5304         switch ((msgbuf[0] & 0xFFFF)) {
5305         case E1000_VF_SET_MAC_ADDR:
5306                 retval = -EINVAL;
5307                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5308                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5309                 else
5310                         dev_warn(&pdev->dev,
5311                                  "VF %d attempted to override administratively "
5312                                  "set MAC address\nReload the VF driver to "
5313                                  "resume operations\n", vf);
5314                 break;
5315         case E1000_VF_SET_PROMISC:
5316                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5317                 break;
5318         case E1000_VF_SET_MULTICAST:
5319                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5320                 break;
5321         case E1000_VF_SET_LPE:
5322                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5323                 break;
5324         case E1000_VF_SET_VLAN:
5325                 retval = -1;
5326                 if (vf_data->pf_vlan)
5327                         dev_warn(&pdev->dev,
5328                                  "VF %d attempted to override administratively "
5329                                  "set VLAN tag\nReload the VF driver to "
5330                                  "resume operations\n", vf);
5331                 else
5332                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5333                 break;
5334         default:
5335                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5336                 retval = -1;
5337                 break;
5338         }
5339
5340         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5341 out:
5342         /* notify the VF of the results of what it sent us */
5343         if (retval)
5344                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5345         else
5346                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5347
5348         igb_write_mbx(hw, msgbuf, 1, vf);
5349 }
5350
5351 static void igb_msg_task(struct igb_adapter *adapter)
5352 {
5353         struct e1000_hw *hw = &adapter->hw;
5354         u32 vf;
5355
5356         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5357                 /* process any reset requests */
5358                 if (!igb_check_for_rst(hw, vf))
5359                         igb_vf_reset_event(adapter, vf);
5360
5361                 /* process any messages pending */
5362                 if (!igb_check_for_msg(hw, vf))
5363                         igb_rcv_msg_from_vf(adapter, vf);
5364
5365                 /* process any acks */
5366                 if (!igb_check_for_ack(hw, vf))
5367                         igb_rcv_ack_from_vf(adapter, vf);
5368         }
5369 }
5370
5371 /**
5372  *  igb_set_uta - Set unicast filter table address
5373  *  @adapter: board private structure
5374  *
5375  *  The unicast table address is a register array of 32-bit registers.
5376  *  The table is meant to be used in a way similar to how the MTA is used
5377  *  however due to certain limitations in the hardware it is necessary to
5378  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5379  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5380  **/
5381 static void igb_set_uta(struct igb_adapter *adapter)
5382 {
5383         struct e1000_hw *hw = &adapter->hw;
5384         int i;
5385
5386         /* The UTA table only exists on 82576 hardware and newer */
5387         if (hw->mac.type < e1000_82576)
5388                 return;
5389
5390         /* we only need to do this if VMDq is enabled */
5391         if (!adapter->vfs_allocated_count)
5392                 return;
5393
5394         for (i = 0; i < hw->mac.uta_reg_count; i++)
5395                 array_wr32(E1000_UTA, i, ~0);
5396 }
5397
5398 /**
5399  * igb_intr_msi - Interrupt Handler
5400  * @irq: interrupt number
5401  * @data: pointer to a network interface device structure
5402  **/
5403 static irqreturn_t igb_intr_msi(int irq, void *data)
5404 {
5405         struct igb_adapter *adapter = data;
5406         struct igb_q_vector *q_vector = adapter->q_vector[0];
5407         struct e1000_hw *hw = &adapter->hw;
5408         /* read ICR disables interrupts using IAM */
5409         u32 icr = rd32(E1000_ICR);
5410
5411         igb_write_itr(q_vector);
5412
5413         if (icr & E1000_ICR_DRSTA)
5414                 schedule_work(&adapter->reset_task);
5415
5416         if (icr & E1000_ICR_DOUTSYNC) {
5417                 /* HW is reporting DMA is out of sync */
5418                 adapter->stats.doosync++;
5419         }
5420
5421         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5422                 hw->mac.get_link_status = 1;
5423                 if (!test_bit(__IGB_DOWN, &adapter->state))
5424                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5425         }
5426
5427         napi_schedule(&q_vector->napi);
5428
5429         return IRQ_HANDLED;
5430 }
5431
5432 /**
5433  * igb_intr - Legacy Interrupt Handler
5434  * @irq: interrupt number
5435  * @data: pointer to a network interface device structure
5436  **/
5437 static irqreturn_t igb_intr(int irq, void *data)
5438 {
5439         struct igb_adapter *adapter = data;
5440         struct igb_q_vector *q_vector = adapter->q_vector[0];
5441         struct e1000_hw *hw = &adapter->hw;
5442         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5443          * need for the IMC write */
5444         u32 icr = rd32(E1000_ICR);
5445         if (!icr)
5446                 return IRQ_NONE;  /* Not our interrupt */
5447
5448         igb_write_itr(q_vector);
5449
5450         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5451          * not set, then the adapter didn't send an interrupt */
5452         if (!(icr & E1000_ICR_INT_ASSERTED))
5453                 return IRQ_NONE;
5454
5455         if (icr & E1000_ICR_DRSTA)
5456                 schedule_work(&adapter->reset_task);
5457
5458         if (icr & E1000_ICR_DOUTSYNC) {
5459                 /* HW is reporting DMA is out of sync */
5460                 adapter->stats.doosync++;
5461         }
5462
5463         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5464                 hw->mac.get_link_status = 1;
5465                 /* guard against interrupt when we're going down */
5466                 if (!test_bit(__IGB_DOWN, &adapter->state))
5467                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5468         }
5469
5470         napi_schedule(&q_vector->napi);
5471
5472         return IRQ_HANDLED;
5473 }
5474
5475 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5476 {
5477         struct igb_adapter *adapter = q_vector->adapter;
5478         struct e1000_hw *hw = &adapter->hw;
5479
5480         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5481             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5482                 if (!adapter->msix_entries)
5483                         igb_set_itr(adapter);
5484                 else
5485                         igb_update_ring_itr(q_vector);
5486         }
5487
5488         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5489                 if (adapter->msix_entries)
5490                         wr32(E1000_EIMS, q_vector->eims_value);
5491                 else
5492                         igb_irq_enable(adapter);
5493         }
5494 }
5495
5496 /**
5497  * igb_poll - NAPI Rx polling callback
5498  * @napi: napi polling structure
5499  * @budget: count of how many packets we should handle
5500  **/
5501 static int igb_poll(struct napi_struct *napi, int budget)
5502 {
5503         struct igb_q_vector *q_vector = container_of(napi,
5504                                                      struct igb_q_vector,
5505                                                      napi);
5506         bool clean_complete = true;
5507
5508 #ifdef CONFIG_IGB_DCA
5509         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5510                 igb_update_dca(q_vector);
5511 #endif
5512         if (q_vector->tx_ring)
5513                 clean_complete = igb_clean_tx_irq(q_vector);
5514
5515         if (q_vector->rx_ring)
5516                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5517
5518         /* If all work not completed, return budget and keep polling */
5519         if (!clean_complete)
5520                 return budget;
5521
5522         /* If not enough Rx work done, exit the polling mode */
5523         napi_complete(napi);
5524         igb_ring_irq_enable(q_vector);
5525
5526         return 0;
5527 }
5528
5529 /**
5530  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5531  * @adapter: board private structure
5532  * @shhwtstamps: timestamp structure to update
5533  * @regval: unsigned 64bit system time value.
5534  *
5535  * We need to convert the system time value stored in the RX/TXSTMP registers
5536  * into a hwtstamp which can be used by the upper level timestamping functions
5537  */
5538 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5539                                    struct skb_shared_hwtstamps *shhwtstamps,
5540                                    u64 regval)
5541 {
5542         u64 ns;
5543
5544         /*
5545          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5546          * 24 to match clock shift we setup earlier.
5547          */
5548         if (adapter->hw.mac.type == e1000_82580)
5549                 regval <<= IGB_82580_TSYNC_SHIFT;
5550
5551         ns = timecounter_cyc2time(&adapter->clock, regval);
5552         timecompare_update(&adapter->compare, ns);
5553         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5554         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5555         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5556 }
5557
5558 /**
5559  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5560  * @q_vector: pointer to q_vector containing needed info
5561  * @buffer: pointer to igb_tx_buffer structure
5562  *
5563  * If we were asked to do hardware stamping and such a time stamp is
5564  * available, then it must have been for this skb here because we only
5565  * allow only one such packet into the queue.
5566  */
5567 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5568                             struct igb_tx_buffer *buffer_info)
5569 {
5570         struct igb_adapter *adapter = q_vector->adapter;
5571         struct e1000_hw *hw = &adapter->hw;
5572         struct skb_shared_hwtstamps shhwtstamps;
5573         u64 regval;
5574
5575         /* if skb does not support hw timestamp or TX stamp not valid exit */
5576         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5577             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5578                 return;
5579
5580         regval = rd32(E1000_TXSTMPL);
5581         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5582
5583         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5584         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5585 }
5586
5587 /**
5588  * igb_clean_tx_irq - Reclaim resources after transmit completes
5589  * @q_vector: pointer to q_vector containing needed info
5590  * returns true if ring is completely cleaned
5591  **/
5592 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5593 {
5594         struct igb_adapter *adapter = q_vector->adapter;
5595         struct igb_ring *tx_ring = q_vector->tx_ring;
5596         struct igb_tx_buffer *tx_buffer;
5597         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5598         unsigned int total_bytes = 0, total_packets = 0;
5599         unsigned int budget = q_vector->tx_work_limit;
5600         unsigned int i = tx_ring->next_to_clean;
5601
5602         if (test_bit(__IGB_DOWN, &adapter->state))
5603                 return true;
5604
5605         tx_buffer = &tx_ring->tx_buffer_info[i];
5606         tx_desc = IGB_TX_DESC(tx_ring, i);
5607         i -= tx_ring->count;
5608
5609         for (; budget; budget--) {
5610                 eop_desc = tx_buffer->next_to_watch;
5611
5612                 /* prevent any other reads prior to eop_desc */
5613                 rmb();
5614
5615                 /* if next_to_watch is not set then there is no work pending */
5616                 if (!eop_desc)
5617                         break;
5618
5619                 /* if DD is not set pending work has not been completed */
5620                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5621                         break;
5622
5623                 /* clear next_to_watch to prevent false hangs */
5624                 tx_buffer->next_to_watch = NULL;
5625
5626                 /* update the statistics for this packet */
5627                 total_bytes += tx_buffer->bytecount;
5628                 total_packets += tx_buffer->gso_segs;
5629
5630                 /* retrieve hardware timestamp */
5631                 igb_tx_hwtstamp(q_vector, tx_buffer);
5632
5633                 /* free the skb */
5634                 dev_kfree_skb_any(tx_buffer->skb);
5635                 tx_buffer->skb = NULL;
5636
5637                 /* unmap skb header data */
5638                 dma_unmap_single(tx_ring->dev,
5639                                  tx_buffer->dma,
5640                                  tx_buffer->length,
5641                                  DMA_TO_DEVICE);
5642
5643                 /* clear last DMA location and unmap remaining buffers */
5644                 while (tx_desc != eop_desc) {
5645                         tx_buffer->dma = 0;
5646
5647                         tx_buffer++;
5648                         tx_desc++;
5649                         i++;
5650                         if (unlikely(!i)) {
5651                                 i -= tx_ring->count;
5652                                 tx_buffer = tx_ring->tx_buffer_info;
5653                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5654                         }
5655
5656                         /* unmap any remaining paged data */
5657                         if (tx_buffer->dma) {
5658                                 dma_unmap_page(tx_ring->dev,
5659                                                tx_buffer->dma,
5660                                                tx_buffer->length,
5661                                                DMA_TO_DEVICE);
5662                         }
5663                 }
5664
5665                 /* clear last DMA location */
5666                 tx_buffer->dma = 0;
5667
5668                 /* move us one more past the eop_desc for start of next pkt */
5669                 tx_buffer++;
5670                 tx_desc++;
5671                 i++;
5672                 if (unlikely(!i)) {
5673                         i -= tx_ring->count;
5674                         tx_buffer = tx_ring->tx_buffer_info;
5675                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5676                 }
5677         }
5678
5679         i += tx_ring->count;
5680         tx_ring->next_to_clean = i;
5681         u64_stats_update_begin(&tx_ring->tx_syncp);
5682         tx_ring->tx_stats.bytes += total_bytes;
5683         tx_ring->tx_stats.packets += total_packets;
5684         u64_stats_update_end(&tx_ring->tx_syncp);
5685         tx_ring->total_bytes += total_bytes;
5686         tx_ring->total_packets += total_packets;
5687
5688         if (tx_ring->detect_tx_hung) {
5689                 struct e1000_hw *hw = &adapter->hw;
5690
5691                 eop_desc = tx_buffer->next_to_watch;
5692
5693                 /* Detect a transmit hang in hardware, this serializes the
5694                  * check with the clearing of time_stamp and movement of i */
5695                 tx_ring->detect_tx_hung = false;
5696                 if (eop_desc &&
5697                     time_after(jiffies, tx_buffer->time_stamp +
5698                                (adapter->tx_timeout_factor * HZ)) &&
5699                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5700
5701                         /* detected Tx unit hang */
5702                         dev_err(tx_ring->dev,
5703                                 "Detected Tx Unit Hang\n"
5704                                 "  Tx Queue             <%d>\n"
5705                                 "  TDH                  <%x>\n"
5706                                 "  TDT                  <%x>\n"
5707                                 "  next_to_use          <%x>\n"
5708                                 "  next_to_clean        <%x>\n"
5709                                 "buffer_info[next_to_clean]\n"
5710                                 "  time_stamp           <%lx>\n"
5711                                 "  next_to_watch        <%p>\n"
5712                                 "  jiffies              <%lx>\n"
5713                                 "  desc.status          <%x>\n",
5714                                 tx_ring->queue_index,
5715                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5716                                 readl(tx_ring->tail),
5717                                 tx_ring->next_to_use,
5718                                 tx_ring->next_to_clean,
5719                                 tx_buffer->time_stamp,
5720                                 eop_desc,
5721                                 jiffies,
5722                                 eop_desc->wb.status);
5723                         netif_stop_subqueue(tx_ring->netdev,
5724                                             tx_ring->queue_index);
5725
5726                         /* we are about to reset, no point in enabling stuff */
5727                         return true;
5728                 }
5729         }
5730
5731         if (unlikely(total_packets &&
5732                      netif_carrier_ok(tx_ring->netdev) &&
5733                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5734                 /* Make sure that anybody stopping the queue after this
5735                  * sees the new next_to_clean.
5736                  */
5737                 smp_mb();
5738                 if (__netif_subqueue_stopped(tx_ring->netdev,
5739                                              tx_ring->queue_index) &&
5740                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5741                         netif_wake_subqueue(tx_ring->netdev,
5742                                             tx_ring->queue_index);
5743
5744                         u64_stats_update_begin(&tx_ring->tx_syncp);
5745                         tx_ring->tx_stats.restart_queue++;
5746                         u64_stats_update_end(&tx_ring->tx_syncp);
5747                 }
5748         }
5749
5750         return !!budget;
5751 }
5752
5753 static inline void igb_rx_checksum(struct igb_ring *ring,
5754                                    u32 status_err, struct sk_buff *skb)
5755 {
5756         skb_checksum_none_assert(skb);
5757
5758         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5759         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5760              (status_err & E1000_RXD_STAT_IXSM))
5761                 return;
5762
5763         /* TCP/UDP checksum error bit is set */
5764         if (status_err &
5765             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5766                 /*
5767                  * work around errata with sctp packets where the TCPE aka
5768                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5769                  * packets, (aka let the stack check the crc32c)
5770                  */
5771                 if ((skb->len == 60) &&
5772                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5773                         u64_stats_update_begin(&ring->rx_syncp);
5774                         ring->rx_stats.csum_err++;
5775                         u64_stats_update_end(&ring->rx_syncp);
5776                 }
5777                 /* let the stack verify checksum errors */
5778                 return;
5779         }
5780         /* It must be a TCP or UDP packet with a valid checksum */
5781         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5782                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5783
5784         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5785 }
5786
5787 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5788                                    struct sk_buff *skb)
5789 {
5790         struct igb_adapter *adapter = q_vector->adapter;
5791         struct e1000_hw *hw = &adapter->hw;
5792         u64 regval;
5793
5794         /*
5795          * If this bit is set, then the RX registers contain the time stamp. No
5796          * other packet will be time stamped until we read these registers, so
5797          * read the registers to make them available again. Because only one
5798          * packet can be time stamped at a time, we know that the register
5799          * values must belong to this one here and therefore we don't need to
5800          * compare any of the additional attributes stored for it.
5801          *
5802          * If nothing went wrong, then it should have a shared tx_flags that we
5803          * can turn into a skb_shared_hwtstamps.
5804          */
5805         if (staterr & E1000_RXDADV_STAT_TSIP) {
5806                 u32 *stamp = (u32 *)skb->data;
5807                 regval = le32_to_cpu(*(stamp + 2));
5808                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5809                 skb_pull(skb, IGB_TS_HDR_LEN);
5810         } else {
5811                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5812                         return;
5813
5814                 regval = rd32(E1000_RXSTMPL);
5815                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5816         }
5817
5818         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5819 }
5820 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5821 {
5822         /* HW will not DMA in data larger than the given buffer, even if it
5823          * parses the (NFS, of course) header to be larger.  In that case, it
5824          * fills the header buffer and spills the rest into the page.
5825          */
5826         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5827                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5828         if (hlen > IGB_RX_HDR_LEN)
5829                 hlen = IGB_RX_HDR_LEN;
5830         return hlen;
5831 }
5832
5833 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5834 {
5835         struct igb_ring *rx_ring = q_vector->rx_ring;
5836         union e1000_adv_rx_desc *rx_desc;
5837         const int current_node = numa_node_id();
5838         unsigned int total_bytes = 0, total_packets = 0;
5839         u32 staterr;
5840         u16 cleaned_count = igb_desc_unused(rx_ring);
5841         u16 i = rx_ring->next_to_clean;
5842
5843         rx_desc = IGB_RX_DESC(rx_ring, i);
5844         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5845
5846         while (staterr & E1000_RXD_STAT_DD) {
5847                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5848                 struct sk_buff *skb = buffer_info->skb;
5849                 union e1000_adv_rx_desc *next_rxd;
5850
5851                 buffer_info->skb = NULL;
5852                 prefetch(skb->data);
5853
5854                 i++;
5855                 if (i == rx_ring->count)
5856                         i = 0;
5857
5858                 next_rxd = IGB_RX_DESC(rx_ring, i);
5859                 prefetch(next_rxd);
5860
5861                 /*
5862                  * This memory barrier is needed to keep us from reading
5863                  * any other fields out of the rx_desc until we know the
5864                  * RXD_STAT_DD bit is set
5865                  */
5866                 rmb();
5867
5868                 if (!skb_is_nonlinear(skb)) {
5869                         __skb_put(skb, igb_get_hlen(rx_desc));
5870                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
5871                                          IGB_RX_HDR_LEN,
5872                                          DMA_FROM_DEVICE);
5873                         buffer_info->dma = 0;
5874                 }
5875
5876                 if (rx_desc->wb.upper.length) {
5877                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5878
5879                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5880                                                 buffer_info->page,
5881                                                 buffer_info->page_offset,
5882                                                 length);
5883
5884                         skb->len += length;
5885                         skb->data_len += length;
5886                         skb->truesize += length;
5887
5888                         if ((page_count(buffer_info->page) != 1) ||
5889                             (page_to_nid(buffer_info->page) != current_node))
5890                                 buffer_info->page = NULL;
5891                         else
5892                                 get_page(buffer_info->page);
5893
5894                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5895                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5896                         buffer_info->page_dma = 0;
5897                 }
5898
5899                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5900                         struct igb_rx_buffer *next_buffer;
5901                         next_buffer = &rx_ring->rx_buffer_info[i];
5902                         buffer_info->skb = next_buffer->skb;
5903                         buffer_info->dma = next_buffer->dma;
5904                         next_buffer->skb = skb;
5905                         next_buffer->dma = 0;
5906                         goto next_desc;
5907                 }
5908
5909                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5910                         dev_kfree_skb_any(skb);
5911                         goto next_desc;
5912                 }
5913
5914                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5915                         igb_rx_hwtstamp(q_vector, staterr, skb);
5916                 total_bytes += skb->len;
5917                 total_packets++;
5918
5919                 igb_rx_checksum(rx_ring, staterr, skb);
5920
5921                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5922
5923                 if (staterr & E1000_RXD_STAT_VP) {
5924                         u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5925
5926                         __vlan_hwaccel_put_tag(skb, vid);
5927                 }
5928                 napi_gro_receive(&q_vector->napi, skb);
5929
5930                 budget--;
5931 next_desc:
5932                 if (!budget)
5933                         break;
5934
5935                 cleaned_count++;
5936                 /* return some buffers to hardware, one at a time is too slow */
5937                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5938                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
5939                         cleaned_count = 0;
5940                 }
5941
5942                 /* use prefetched values */
5943                 rx_desc = next_rxd;
5944                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5945         }
5946
5947         rx_ring->next_to_clean = i;
5948         u64_stats_update_begin(&rx_ring->rx_syncp);
5949         rx_ring->rx_stats.packets += total_packets;
5950         rx_ring->rx_stats.bytes += total_bytes;
5951         u64_stats_update_end(&rx_ring->rx_syncp);
5952         rx_ring->total_packets += total_packets;
5953         rx_ring->total_bytes += total_bytes;
5954
5955         if (cleaned_count)
5956                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5957
5958         return !!budget;
5959 }
5960
5961 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5962                                  struct igb_rx_buffer *bi)
5963 {
5964         struct sk_buff *skb = bi->skb;
5965         dma_addr_t dma = bi->dma;
5966
5967         if (dma)
5968                 return true;
5969
5970         if (likely(!skb)) {
5971                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5972                                                 IGB_RX_HDR_LEN);
5973                 bi->skb = skb;
5974                 if (!skb) {
5975                         rx_ring->rx_stats.alloc_failed++;
5976                         return false;
5977                 }
5978
5979                 /* initialize skb for ring */
5980                 skb_record_rx_queue(skb, rx_ring->queue_index);
5981         }
5982
5983         dma = dma_map_single(rx_ring->dev, skb->data,
5984                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5985
5986         if (dma_mapping_error(rx_ring->dev, dma)) {
5987                 rx_ring->rx_stats.alloc_failed++;
5988                 return false;
5989         }
5990
5991         bi->dma = dma;
5992         return true;
5993 }
5994
5995 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5996                                   struct igb_rx_buffer *bi)
5997 {
5998         struct page *page = bi->page;
5999         dma_addr_t page_dma = bi->page_dma;
6000         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6001
6002         if (page_dma)
6003                 return true;
6004
6005         if (!page) {
6006                 page = netdev_alloc_page(rx_ring->netdev);
6007                 bi->page = page;
6008                 if (unlikely(!page)) {
6009                         rx_ring->rx_stats.alloc_failed++;
6010                         return false;
6011                 }
6012         }
6013
6014         page_dma = dma_map_page(rx_ring->dev, page,
6015                                 page_offset, PAGE_SIZE / 2,
6016                                 DMA_FROM_DEVICE);
6017
6018         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6019                 rx_ring->rx_stats.alloc_failed++;
6020                 return false;
6021         }
6022
6023         bi->page_dma = page_dma;
6024         bi->page_offset = page_offset;
6025         return true;
6026 }
6027
6028 /**
6029  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6030  * @adapter: address of board private structure
6031  **/
6032 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6033 {
6034         union e1000_adv_rx_desc *rx_desc;
6035         struct igb_rx_buffer *bi;
6036         u16 i = rx_ring->next_to_use;
6037
6038         rx_desc = IGB_RX_DESC(rx_ring, i);
6039         bi = &rx_ring->rx_buffer_info[i];
6040         i -= rx_ring->count;
6041
6042         while (cleaned_count--) {
6043                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6044                         break;
6045
6046                 /* Refresh the desc even if buffer_addrs didn't change
6047                  * because each write-back erases this info. */
6048                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6049
6050                 if (!igb_alloc_mapped_page(rx_ring, bi))
6051                         break;
6052
6053                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6054
6055                 rx_desc++;
6056                 bi++;
6057                 i++;
6058                 if (unlikely(!i)) {
6059                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6060                         bi = rx_ring->rx_buffer_info;
6061                         i -= rx_ring->count;
6062                 }
6063
6064                 /* clear the hdr_addr for the next_to_use descriptor */
6065                 rx_desc->read.hdr_addr = 0;
6066         }
6067
6068         i += rx_ring->count;
6069
6070         if (rx_ring->next_to_use != i) {
6071                 rx_ring->next_to_use = i;
6072
6073                 /* Force memory writes to complete before letting h/w
6074                  * know there are new descriptors to fetch.  (Only
6075                  * applicable for weak-ordered memory model archs,
6076                  * such as IA-64). */
6077                 wmb();
6078                 writel(i, rx_ring->tail);
6079         }
6080 }
6081
6082 /**
6083  * igb_mii_ioctl -
6084  * @netdev:
6085  * @ifreq:
6086  * @cmd:
6087  **/
6088 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6089 {
6090         struct igb_adapter *adapter = netdev_priv(netdev);
6091         struct mii_ioctl_data *data = if_mii(ifr);
6092
6093         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6094                 return -EOPNOTSUPP;
6095
6096         switch (cmd) {
6097         case SIOCGMIIPHY:
6098                 data->phy_id = adapter->hw.phy.addr;
6099                 break;
6100         case SIOCGMIIREG:
6101                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6102                                      &data->val_out))
6103                         return -EIO;
6104                 break;
6105         case SIOCSMIIREG:
6106         default:
6107                 return -EOPNOTSUPP;
6108         }
6109         return 0;
6110 }
6111
6112 /**
6113  * igb_hwtstamp_ioctl - control hardware time stamping
6114  * @netdev:
6115  * @ifreq:
6116  * @cmd:
6117  *
6118  * Outgoing time stamping can be enabled and disabled. Play nice and
6119  * disable it when requested, although it shouldn't case any overhead
6120  * when no packet needs it. At most one packet in the queue may be
6121  * marked for time stamping, otherwise it would be impossible to tell
6122  * for sure to which packet the hardware time stamp belongs.
6123  *
6124  * Incoming time stamping has to be configured via the hardware
6125  * filters. Not all combinations are supported, in particular event
6126  * type has to be specified. Matching the kind of event packet is
6127  * not supported, with the exception of "all V2 events regardless of
6128  * level 2 or 4".
6129  *
6130  **/
6131 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6132                               struct ifreq *ifr, int cmd)
6133 {
6134         struct igb_adapter *adapter = netdev_priv(netdev);
6135         struct e1000_hw *hw = &adapter->hw;
6136         struct hwtstamp_config config;
6137         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6138         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6139         u32 tsync_rx_cfg = 0;
6140         bool is_l4 = false;
6141         bool is_l2 = false;
6142         u32 regval;
6143
6144         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6145                 return -EFAULT;
6146
6147         /* reserved for future extensions */
6148         if (config.flags)
6149                 return -EINVAL;
6150
6151         switch (config.tx_type) {
6152         case HWTSTAMP_TX_OFF:
6153                 tsync_tx_ctl = 0;
6154         case HWTSTAMP_TX_ON:
6155                 break;
6156         default:
6157                 return -ERANGE;
6158         }
6159
6160         switch (config.rx_filter) {
6161         case HWTSTAMP_FILTER_NONE:
6162                 tsync_rx_ctl = 0;
6163                 break;
6164         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6165         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6166         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6167         case HWTSTAMP_FILTER_ALL:
6168                 /*
6169                  * register TSYNCRXCFG must be set, therefore it is not
6170                  * possible to time stamp both Sync and Delay_Req messages
6171                  * => fall back to time stamping all packets
6172                  */
6173                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6174                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6175                 break;
6176         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6177                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6178                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6179                 is_l4 = true;
6180                 break;
6181         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6182                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6183                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6184                 is_l4 = true;
6185                 break;
6186         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6187         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6188                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6189                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6190                 is_l2 = true;
6191                 is_l4 = true;
6192                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6193                 break;
6194         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6195         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6196                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6197                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6198                 is_l2 = true;
6199                 is_l4 = true;
6200                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6201                 break;
6202         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6203         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6204         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6205                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6206                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6207                 is_l2 = true;
6208                 break;
6209         default:
6210                 return -ERANGE;
6211         }
6212
6213         if (hw->mac.type == e1000_82575) {
6214                 if (tsync_rx_ctl | tsync_tx_ctl)
6215                         return -EINVAL;
6216                 return 0;
6217         }
6218
6219         /*
6220          * Per-packet timestamping only works if all packets are
6221          * timestamped, so enable timestamping in all packets as
6222          * long as one rx filter was configured.
6223          */
6224         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6225                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6226                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6227         }
6228
6229         /* enable/disable TX */
6230         regval = rd32(E1000_TSYNCTXCTL);
6231         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6232         regval |= tsync_tx_ctl;
6233         wr32(E1000_TSYNCTXCTL, regval);
6234
6235         /* enable/disable RX */
6236         regval = rd32(E1000_TSYNCRXCTL);
6237         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6238         regval |= tsync_rx_ctl;
6239         wr32(E1000_TSYNCRXCTL, regval);
6240
6241         /* define which PTP packets are time stamped */
6242         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6243
6244         /* define ethertype filter for timestamped packets */
6245         if (is_l2)
6246                 wr32(E1000_ETQF(3),
6247                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6248                                  E1000_ETQF_1588 | /* enable timestamping */
6249                                  ETH_P_1588));     /* 1588 eth protocol type */
6250         else
6251                 wr32(E1000_ETQF(3), 0);
6252
6253 #define PTP_PORT 319
6254         /* L4 Queue Filter[3]: filter by destination port and protocol */
6255         if (is_l4) {
6256                 u32 ftqf = (IPPROTO_UDP /* UDP */
6257                         | E1000_FTQF_VF_BP /* VF not compared */
6258                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6259                         | E1000_FTQF_MASK); /* mask all inputs */
6260                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6261
6262                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6263                 wr32(E1000_IMIREXT(3),
6264                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6265                 if (hw->mac.type == e1000_82576) {
6266                         /* enable source port check */
6267                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6268                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6269                 }
6270                 wr32(E1000_FTQF(3), ftqf);
6271         } else {
6272                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6273         }
6274         wrfl();
6275
6276         adapter->hwtstamp_config = config;
6277
6278         /* clear TX/RX time stamp registers, just to be sure */
6279         regval = rd32(E1000_TXSTMPH);
6280         regval = rd32(E1000_RXSTMPH);
6281
6282         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6283                 -EFAULT : 0;
6284 }
6285
6286 /**
6287  * igb_ioctl -
6288  * @netdev:
6289  * @ifreq:
6290  * @cmd:
6291  **/
6292 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6293 {
6294         switch (cmd) {
6295         case SIOCGMIIPHY:
6296         case SIOCGMIIREG:
6297         case SIOCSMIIREG:
6298                 return igb_mii_ioctl(netdev, ifr, cmd);
6299         case SIOCSHWTSTAMP:
6300                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6301         default:
6302                 return -EOPNOTSUPP;
6303         }
6304 }
6305
6306 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6307 {
6308         struct igb_adapter *adapter = hw->back;
6309         u16 cap_offset;
6310
6311         cap_offset = adapter->pdev->pcie_cap;
6312         if (!cap_offset)
6313                 return -E1000_ERR_CONFIG;
6314
6315         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6316
6317         return 0;
6318 }
6319
6320 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6321 {
6322         struct igb_adapter *adapter = hw->back;
6323         u16 cap_offset;
6324
6325         cap_offset = adapter->pdev->pcie_cap;
6326         if (!cap_offset)
6327                 return -E1000_ERR_CONFIG;
6328
6329         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6330
6331         return 0;
6332 }
6333
6334 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6335 {
6336         struct igb_adapter *adapter = netdev_priv(netdev);
6337         struct e1000_hw *hw = &adapter->hw;
6338         u32 ctrl, rctl;
6339
6340         igb_irq_disable(adapter);
6341
6342         if (features & NETIF_F_HW_VLAN_RX) {
6343                 /* enable VLAN tag insert/strip */
6344                 ctrl = rd32(E1000_CTRL);
6345                 ctrl |= E1000_CTRL_VME;
6346                 wr32(E1000_CTRL, ctrl);
6347
6348                 /* Disable CFI check */
6349                 rctl = rd32(E1000_RCTL);
6350                 rctl &= ~E1000_RCTL_CFIEN;
6351                 wr32(E1000_RCTL, rctl);
6352         } else {
6353                 /* disable VLAN tag insert/strip */
6354                 ctrl = rd32(E1000_CTRL);
6355                 ctrl &= ~E1000_CTRL_VME;
6356                 wr32(E1000_CTRL, ctrl);
6357         }
6358
6359         igb_rlpml_set(adapter);
6360
6361         if (!test_bit(__IGB_DOWN, &adapter->state))
6362                 igb_irq_enable(adapter);
6363 }
6364
6365 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6366 {
6367         struct igb_adapter *adapter = netdev_priv(netdev);
6368         struct e1000_hw *hw = &adapter->hw;
6369         int pf_id = adapter->vfs_allocated_count;
6370
6371         /* attempt to add filter to vlvf array */
6372         igb_vlvf_set(adapter, vid, true, pf_id);
6373
6374         /* add the filter since PF can receive vlans w/o entry in vlvf */
6375         igb_vfta_set(hw, vid, true);
6376
6377         set_bit(vid, adapter->active_vlans);
6378 }
6379
6380 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6381 {
6382         struct igb_adapter *adapter = netdev_priv(netdev);
6383         struct e1000_hw *hw = &adapter->hw;
6384         int pf_id = adapter->vfs_allocated_count;
6385         s32 err;
6386
6387         igb_irq_disable(adapter);
6388
6389         if (!test_bit(__IGB_DOWN, &adapter->state))
6390                 igb_irq_enable(adapter);
6391
6392         /* remove vlan from VLVF table array */
6393         err = igb_vlvf_set(adapter, vid, false, pf_id);
6394
6395         /* if vid was not present in VLVF just remove it from table */
6396         if (err)
6397                 igb_vfta_set(hw, vid, false);
6398
6399         clear_bit(vid, adapter->active_vlans);
6400 }
6401
6402 static void igb_restore_vlan(struct igb_adapter *adapter)
6403 {
6404         u16 vid;
6405
6406         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6407                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6408 }
6409
6410 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6411 {
6412         struct pci_dev *pdev = adapter->pdev;
6413         struct e1000_mac_info *mac = &adapter->hw.mac;
6414
6415         mac->autoneg = 0;
6416
6417         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6418          * for the switch() below to work */
6419         if ((spd & 1) || (dplx & ~1))
6420                 goto err_inval;
6421
6422         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6423         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6424             spd != SPEED_1000 &&
6425             dplx != DUPLEX_FULL)
6426                 goto err_inval;
6427
6428         switch (spd + dplx) {
6429         case SPEED_10 + DUPLEX_HALF:
6430                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6431                 break;
6432         case SPEED_10 + DUPLEX_FULL:
6433                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6434                 break;
6435         case SPEED_100 + DUPLEX_HALF:
6436                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6437                 break;
6438         case SPEED_100 + DUPLEX_FULL:
6439                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6440                 break;
6441         case SPEED_1000 + DUPLEX_FULL:
6442                 mac->autoneg = 1;
6443                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6444                 break;
6445         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6446         default:
6447                 goto err_inval;
6448         }
6449         return 0;
6450
6451 err_inval:
6452         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6453         return -EINVAL;
6454 }
6455
6456 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6457 {
6458         struct net_device *netdev = pci_get_drvdata(pdev);
6459         struct igb_adapter *adapter = netdev_priv(netdev);
6460         struct e1000_hw *hw = &adapter->hw;
6461         u32 ctrl, rctl, status;
6462         u32 wufc = adapter->wol;
6463 #ifdef CONFIG_PM
6464         int retval = 0;
6465 #endif
6466
6467         netif_device_detach(netdev);
6468
6469         if (netif_running(netdev))
6470                 igb_close(netdev);
6471
6472         igb_clear_interrupt_scheme(adapter);
6473
6474 #ifdef CONFIG_PM
6475         retval = pci_save_state(pdev);
6476         if (retval)
6477                 return retval;
6478 #endif
6479
6480         status = rd32(E1000_STATUS);
6481         if (status & E1000_STATUS_LU)
6482                 wufc &= ~E1000_WUFC_LNKC;
6483
6484         if (wufc) {
6485                 igb_setup_rctl(adapter);
6486                 igb_set_rx_mode(netdev);
6487
6488                 /* turn on all-multi mode if wake on multicast is enabled */
6489                 if (wufc & E1000_WUFC_MC) {
6490                         rctl = rd32(E1000_RCTL);
6491                         rctl |= E1000_RCTL_MPE;
6492                         wr32(E1000_RCTL, rctl);
6493                 }
6494
6495                 ctrl = rd32(E1000_CTRL);
6496                 /* advertise wake from D3Cold */
6497                 #define E1000_CTRL_ADVD3WUC 0x00100000
6498                 /* phy power management enable */
6499                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6500                 ctrl |= E1000_CTRL_ADVD3WUC;
6501                 wr32(E1000_CTRL, ctrl);
6502
6503                 /* Allow time for pending master requests to run */
6504                 igb_disable_pcie_master(hw);
6505
6506                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6507                 wr32(E1000_WUFC, wufc);
6508         } else {
6509                 wr32(E1000_WUC, 0);
6510                 wr32(E1000_WUFC, 0);
6511         }
6512
6513         *enable_wake = wufc || adapter->en_mng_pt;
6514         if (!*enable_wake)
6515                 igb_power_down_link(adapter);
6516         else
6517                 igb_power_up_link(adapter);
6518
6519         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6520          * would have already happened in close and is redundant. */
6521         igb_release_hw_control(adapter);
6522
6523         pci_disable_device(pdev);
6524
6525         return 0;
6526 }
6527
6528 #ifdef CONFIG_PM
6529 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6530 {
6531         int retval;
6532         bool wake;
6533
6534         retval = __igb_shutdown(pdev, &wake);
6535         if (retval)
6536                 return retval;
6537
6538         if (wake) {
6539                 pci_prepare_to_sleep(pdev);
6540         } else {
6541                 pci_wake_from_d3(pdev, false);
6542                 pci_set_power_state(pdev, PCI_D3hot);
6543         }
6544
6545         return 0;
6546 }
6547
6548 static int igb_resume(struct pci_dev *pdev)
6549 {
6550         struct net_device *netdev = pci_get_drvdata(pdev);
6551         struct igb_adapter *adapter = netdev_priv(netdev);
6552         struct e1000_hw *hw = &adapter->hw;
6553         u32 err;
6554
6555         pci_set_power_state(pdev, PCI_D0);
6556         pci_restore_state(pdev);
6557         pci_save_state(pdev);
6558
6559         err = pci_enable_device_mem(pdev);
6560         if (err) {
6561                 dev_err(&pdev->dev,
6562                         "igb: Cannot enable PCI device from suspend\n");
6563                 return err;
6564         }
6565         pci_set_master(pdev);
6566
6567         pci_enable_wake(pdev, PCI_D3hot, 0);
6568         pci_enable_wake(pdev, PCI_D3cold, 0);
6569
6570         if (igb_init_interrupt_scheme(adapter)) {
6571                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6572                 return -ENOMEM;
6573         }
6574
6575         igb_reset(adapter);
6576
6577         /* let the f/w know that the h/w is now under the control of the
6578          * driver. */
6579         igb_get_hw_control(adapter);
6580
6581         wr32(E1000_WUS, ~0);
6582
6583         if (netif_running(netdev)) {
6584                 err = igb_open(netdev);
6585                 if (err)
6586                         return err;
6587         }
6588
6589         netif_device_attach(netdev);
6590
6591         return 0;
6592 }
6593 #endif
6594
6595 static void igb_shutdown(struct pci_dev *pdev)
6596 {
6597         bool wake;
6598
6599         __igb_shutdown(pdev, &wake);
6600
6601         if (system_state == SYSTEM_POWER_OFF) {
6602                 pci_wake_from_d3(pdev, wake);
6603                 pci_set_power_state(pdev, PCI_D3hot);
6604         }
6605 }
6606
6607 #ifdef CONFIG_NET_POLL_CONTROLLER
6608 /*
6609  * Polling 'interrupt' - used by things like netconsole to send skbs
6610  * without having to re-enable interrupts. It's not called while
6611  * the interrupt routine is executing.
6612  */
6613 static void igb_netpoll(struct net_device *netdev)
6614 {
6615         struct igb_adapter *adapter = netdev_priv(netdev);
6616         struct e1000_hw *hw = &adapter->hw;
6617         int i;
6618
6619         if (!adapter->msix_entries) {
6620                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6621                 igb_irq_disable(adapter);
6622                 napi_schedule(&q_vector->napi);
6623                 return;
6624         }
6625
6626         for (i = 0; i < adapter->num_q_vectors; i++) {
6627                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6628                 wr32(E1000_EIMC, q_vector->eims_value);
6629                 napi_schedule(&q_vector->napi);
6630         }
6631 }
6632 #endif /* CONFIG_NET_POLL_CONTROLLER */
6633
6634 /**
6635  * igb_io_error_detected - called when PCI error is detected
6636  * @pdev: Pointer to PCI device
6637  * @state: The current pci connection state
6638  *
6639  * This function is called after a PCI bus error affecting
6640  * this device has been detected.
6641  */
6642 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6643                                               pci_channel_state_t state)
6644 {
6645         struct net_device *netdev = pci_get_drvdata(pdev);
6646         struct igb_adapter *adapter = netdev_priv(netdev);
6647
6648         netif_device_detach(netdev);
6649
6650         if (state == pci_channel_io_perm_failure)
6651                 return PCI_ERS_RESULT_DISCONNECT;
6652
6653         if (netif_running(netdev))
6654                 igb_down(adapter);
6655         pci_disable_device(pdev);
6656
6657         /* Request a slot slot reset. */
6658         return PCI_ERS_RESULT_NEED_RESET;
6659 }
6660
6661 /**
6662  * igb_io_slot_reset - called after the pci bus has been reset.
6663  * @pdev: Pointer to PCI device
6664  *
6665  * Restart the card from scratch, as if from a cold-boot. Implementation
6666  * resembles the first-half of the igb_resume routine.
6667  */
6668 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6669 {
6670         struct net_device *netdev = pci_get_drvdata(pdev);
6671         struct igb_adapter *adapter = netdev_priv(netdev);
6672         struct e1000_hw *hw = &adapter->hw;
6673         pci_ers_result_t result;
6674         int err;
6675
6676         if (pci_enable_device_mem(pdev)) {
6677                 dev_err(&pdev->dev,
6678                         "Cannot re-enable PCI device after reset.\n");
6679                 result = PCI_ERS_RESULT_DISCONNECT;
6680         } else {
6681                 pci_set_master(pdev);
6682                 pci_restore_state(pdev);
6683                 pci_save_state(pdev);
6684
6685                 pci_enable_wake(pdev, PCI_D3hot, 0);
6686                 pci_enable_wake(pdev, PCI_D3cold, 0);
6687
6688                 igb_reset(adapter);
6689                 wr32(E1000_WUS, ~0);
6690                 result = PCI_ERS_RESULT_RECOVERED;
6691         }
6692
6693         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6694         if (err) {
6695                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6696                         "failed 0x%0x\n", err);
6697                 /* non-fatal, continue */
6698         }
6699
6700         return result;
6701 }
6702
6703 /**
6704  * igb_io_resume - called when traffic can start flowing again.
6705  * @pdev: Pointer to PCI device
6706  *
6707  * This callback is called when the error recovery driver tells us that
6708  * its OK to resume normal operation. Implementation resembles the
6709  * second-half of the igb_resume routine.
6710  */
6711 static void igb_io_resume(struct pci_dev *pdev)
6712 {
6713         struct net_device *netdev = pci_get_drvdata(pdev);
6714         struct igb_adapter *adapter = netdev_priv(netdev);
6715
6716         if (netif_running(netdev)) {
6717                 if (igb_up(adapter)) {
6718                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6719                         return;
6720                 }
6721         }
6722
6723         netif_device_attach(netdev);
6724
6725         /* let the f/w know that the h/w is now under the control of the
6726          * driver. */
6727         igb_get_hw_control(adapter);
6728 }
6729
6730 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6731                              u8 qsel)
6732 {
6733         u32 rar_low, rar_high;
6734         struct e1000_hw *hw = &adapter->hw;
6735
6736         /* HW expects these in little endian so we reverse the byte order
6737          * from network order (big endian) to little endian
6738          */
6739         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6740                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6741         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6742
6743         /* Indicate to hardware the Address is Valid. */
6744         rar_high |= E1000_RAH_AV;
6745
6746         if (hw->mac.type == e1000_82575)
6747                 rar_high |= E1000_RAH_POOL_1 * qsel;
6748         else
6749                 rar_high |= E1000_RAH_POOL_1 << qsel;
6750
6751         wr32(E1000_RAL(index), rar_low);
6752         wrfl();
6753         wr32(E1000_RAH(index), rar_high);
6754         wrfl();
6755 }
6756
6757 static int igb_set_vf_mac(struct igb_adapter *adapter,
6758                           int vf, unsigned char *mac_addr)
6759 {
6760         struct e1000_hw *hw = &adapter->hw;
6761         /* VF MAC addresses start at end of receive addresses and moves
6762          * torwards the first, as a result a collision should not be possible */
6763         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6764
6765         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6766
6767         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6768
6769         return 0;
6770 }
6771
6772 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6773 {
6774         struct igb_adapter *adapter = netdev_priv(netdev);
6775         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6776                 return -EINVAL;
6777         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6778         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6779         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6780                                       " change effective.");
6781         if (test_bit(__IGB_DOWN, &adapter->state)) {
6782                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6783                          " but the PF device is not up.\n");
6784                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6785                          " attempting to use the VF device.\n");
6786         }
6787         return igb_set_vf_mac(adapter, vf, mac);
6788 }
6789
6790 static int igb_link_mbps(int internal_link_speed)
6791 {
6792         switch (internal_link_speed) {
6793         case SPEED_100:
6794                 return 100;
6795         case SPEED_1000:
6796                 return 1000;
6797         default:
6798                 return 0;
6799         }
6800 }
6801
6802 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6803                                   int link_speed)
6804 {
6805         int rf_dec, rf_int;
6806         u32 bcnrc_val;
6807
6808         if (tx_rate != 0) {
6809                 /* Calculate the rate factor values to set */
6810                 rf_int = link_speed / tx_rate;
6811                 rf_dec = (link_speed - (rf_int * tx_rate));
6812                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6813
6814                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6815                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6816                                E1000_RTTBCNRC_RF_INT_MASK);
6817                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6818         } else {
6819                 bcnrc_val = 0;
6820         }
6821
6822         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6823         wr32(E1000_RTTBCNRC, bcnrc_val);
6824 }
6825
6826 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6827 {
6828         int actual_link_speed, i;
6829         bool reset_rate = false;
6830
6831         /* VF TX rate limit was not set or not supported */
6832         if ((adapter->vf_rate_link_speed == 0) ||
6833             (adapter->hw.mac.type != e1000_82576))
6834                 return;
6835
6836         actual_link_speed = igb_link_mbps(adapter->link_speed);
6837         if (actual_link_speed != adapter->vf_rate_link_speed) {
6838                 reset_rate = true;
6839                 adapter->vf_rate_link_speed = 0;
6840                 dev_info(&adapter->pdev->dev,
6841                          "Link speed has been changed. VF Transmit "
6842                          "rate is disabled\n");
6843         }
6844
6845         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6846                 if (reset_rate)
6847                         adapter->vf_data[i].tx_rate = 0;
6848
6849                 igb_set_vf_rate_limit(&adapter->hw, i,
6850                                       adapter->vf_data[i].tx_rate,
6851                                       actual_link_speed);
6852         }
6853 }
6854
6855 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6856 {
6857         struct igb_adapter *adapter = netdev_priv(netdev);
6858         struct e1000_hw *hw = &adapter->hw;
6859         int actual_link_speed;
6860
6861         if (hw->mac.type != e1000_82576)
6862                 return -EOPNOTSUPP;
6863
6864         actual_link_speed = igb_link_mbps(adapter->link_speed);
6865         if ((vf >= adapter->vfs_allocated_count) ||
6866             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6867             (tx_rate < 0) || (tx_rate > actual_link_speed))
6868                 return -EINVAL;
6869
6870         adapter->vf_rate_link_speed = actual_link_speed;
6871         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6872         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6873
6874         return 0;
6875 }
6876
6877 static int igb_ndo_get_vf_config(struct net_device *netdev,
6878                                  int vf, struct ifla_vf_info *ivi)
6879 {
6880         struct igb_adapter *adapter = netdev_priv(netdev);
6881         if (vf >= adapter->vfs_allocated_count)
6882                 return -EINVAL;
6883         ivi->vf = vf;
6884         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6885         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6886         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6887         ivi->qos = adapter->vf_data[vf].pf_qos;
6888         return 0;
6889 }
6890
6891 static void igb_vmm_control(struct igb_adapter *adapter)
6892 {
6893         struct e1000_hw *hw = &adapter->hw;
6894         u32 reg;
6895
6896         switch (hw->mac.type) {
6897         case e1000_82575:
6898         default:
6899                 /* replication is not supported for 82575 */
6900                 return;
6901         case e1000_82576:
6902                 /* notify HW that the MAC is adding vlan tags */
6903                 reg = rd32(E1000_DTXCTL);
6904                 reg |= E1000_DTXCTL_VLAN_ADDED;
6905                 wr32(E1000_DTXCTL, reg);
6906         case e1000_82580:
6907                 /* enable replication vlan tag stripping */
6908                 reg = rd32(E1000_RPLOLR);
6909                 reg |= E1000_RPLOLR_STRVLAN;
6910                 wr32(E1000_RPLOLR, reg);
6911         case e1000_i350:
6912                 /* none of the above registers are supported by i350 */
6913                 break;
6914         }
6915
6916         if (adapter->vfs_allocated_count) {
6917                 igb_vmdq_set_loopback_pf(hw, true);
6918                 igb_vmdq_set_replication_pf(hw, true);
6919                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6920                                                 adapter->vfs_allocated_count);
6921         } else {
6922                 igb_vmdq_set_loopback_pf(hw, false);
6923                 igb_vmdq_set_replication_pf(hw, false);
6924         }
6925 }
6926
6927 /* igb_main.c */