igb: Move ITR related data into work container within the q_vector
[firefly-linux-kernel-4.4.55.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/ip.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
54 #ifdef CONFIG_IGB_DCA
55 #include <linux/dca.h>
56 #endif
57 #include "igb.h"
58
59 #define MAJ 3
60 #define MIN 0
61 #define BUILD 6
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67                                 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70 static const struct e1000_info *igb_info_tbl[] = {
71         [board_82575] = &e1000_82575_info,
72 };
73
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100         /* required last entry */
101         {0, }
102 };
103
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130                                                  struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160                                int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163                                  struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166 #ifdef CONFIG_PM
167 static int igb_suspend(struct pci_dev *, pm_message_t);
168 static int igb_resume(struct pci_dev *);
169 #endif
170 static void igb_shutdown(struct pci_dev *);
171 #ifdef CONFIG_IGB_DCA
172 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173 static struct notifier_block dca_notifier = {
174         .notifier_call  = igb_notify_dca,
175         .next           = NULL,
176         .priority       = 0
177 };
178 #endif
179 #ifdef CONFIG_NET_POLL_CONTROLLER
180 /* for netdump / net console */
181 static void igb_netpoll(struct net_device *);
182 #endif
183 #ifdef CONFIG_PCI_IOV
184 static unsigned int max_vfs = 0;
185 module_param(max_vfs, uint, 0);
186 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187                  "per physical function");
188 #endif /* CONFIG_PCI_IOV */
189
190 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191                      pci_channel_state_t);
192 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193 static void igb_io_resume(struct pci_dev *);
194
195 static struct pci_error_handlers igb_err_handler = {
196         .error_detected = igb_io_error_detected,
197         .slot_reset = igb_io_slot_reset,
198         .resume = igb_io_resume,
199 };
200
201
202 static struct pci_driver igb_driver = {
203         .name     = igb_driver_name,
204         .id_table = igb_pci_tbl,
205         .probe    = igb_probe,
206         .remove   = __devexit_p(igb_remove),
207 #ifdef CONFIG_PM
208         /* Power Management Hooks */
209         .suspend  = igb_suspend,
210         .resume   = igb_resume,
211 #endif
212         .shutdown = igb_shutdown,
213         .err_handler = &igb_err_handler
214 };
215
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
220
221 struct igb_reg_info {
222         u32 ofs;
223         char *name;
224 };
225
226 static const struct igb_reg_info igb_reg_info_tbl[] = {
227
228         /* General Registers */
229         {E1000_CTRL, "CTRL"},
230         {E1000_STATUS, "STATUS"},
231         {E1000_CTRL_EXT, "CTRL_EXT"},
232
233         /* Interrupt Registers */
234         {E1000_ICR, "ICR"},
235
236         /* RX Registers */
237         {E1000_RCTL, "RCTL"},
238         {E1000_RDLEN(0), "RDLEN"},
239         {E1000_RDH(0), "RDH"},
240         {E1000_RDT(0), "RDT"},
241         {E1000_RXDCTL(0), "RXDCTL"},
242         {E1000_RDBAL(0), "RDBAL"},
243         {E1000_RDBAH(0), "RDBAH"},
244
245         /* TX Registers */
246         {E1000_TCTL, "TCTL"},
247         {E1000_TDBAL(0), "TDBAL"},
248         {E1000_TDBAH(0), "TDBAH"},
249         {E1000_TDLEN(0), "TDLEN"},
250         {E1000_TDH(0), "TDH"},
251         {E1000_TDT(0), "TDT"},
252         {E1000_TXDCTL(0), "TXDCTL"},
253         {E1000_TDFH, "TDFH"},
254         {E1000_TDFT, "TDFT"},
255         {E1000_TDFHS, "TDFHS"},
256         {E1000_TDFPC, "TDFPC"},
257
258         /* List Terminator */
259         {}
260 };
261
262 /*
263  * igb_regdump - register printout routine
264  */
265 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
266 {
267         int n = 0;
268         char rname[16];
269         u32 regs[8];
270
271         switch (reginfo->ofs) {
272         case E1000_RDLEN(0):
273                 for (n = 0; n < 4; n++)
274                         regs[n] = rd32(E1000_RDLEN(n));
275                 break;
276         case E1000_RDH(0):
277                 for (n = 0; n < 4; n++)
278                         regs[n] = rd32(E1000_RDH(n));
279                 break;
280         case E1000_RDT(0):
281                 for (n = 0; n < 4; n++)
282                         regs[n] = rd32(E1000_RDT(n));
283                 break;
284         case E1000_RXDCTL(0):
285                 for (n = 0; n < 4; n++)
286                         regs[n] = rd32(E1000_RXDCTL(n));
287                 break;
288         case E1000_RDBAL(0):
289                 for (n = 0; n < 4; n++)
290                         regs[n] = rd32(E1000_RDBAL(n));
291                 break;
292         case E1000_RDBAH(0):
293                 for (n = 0; n < 4; n++)
294                         regs[n] = rd32(E1000_RDBAH(n));
295                 break;
296         case E1000_TDBAL(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_RDBAL(n));
299                 break;
300         case E1000_TDBAH(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_TDBAH(n));
303                 break;
304         case E1000_TDLEN(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_TDLEN(n));
307                 break;
308         case E1000_TDH(0):
309                 for (n = 0; n < 4; n++)
310                         regs[n] = rd32(E1000_TDH(n));
311                 break;
312         case E1000_TDT(0):
313                 for (n = 0; n < 4; n++)
314                         regs[n] = rd32(E1000_TDT(n));
315                 break;
316         case E1000_TXDCTL(0):
317                 for (n = 0; n < 4; n++)
318                         regs[n] = rd32(E1000_TXDCTL(n));
319                 break;
320         default:
321                 printk(KERN_INFO "%-15s %08x\n",
322                         reginfo->name, rd32(reginfo->ofs));
323                 return;
324         }
325
326         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327         printk(KERN_INFO "%-15s ", rname);
328         for (n = 0; n < 4; n++)
329                 printk(KERN_CONT "%08x ", regs[n]);
330         printk(KERN_CONT "\n");
331 }
332
333 /*
334  * igb_dump - Print registers, tx-rings and rx-rings
335  */
336 static void igb_dump(struct igb_adapter *adapter)
337 {
338         struct net_device *netdev = adapter->netdev;
339         struct e1000_hw *hw = &adapter->hw;
340         struct igb_reg_info *reginfo;
341         struct igb_ring *tx_ring;
342         union e1000_adv_tx_desc *tx_desc;
343         struct my_u0 { u64 a; u64 b; } *u0;
344         struct igb_ring *rx_ring;
345         union e1000_adv_rx_desc *rx_desc;
346         u32 staterr;
347         u16 i, n;
348
349         if (!netif_msg_hw(adapter))
350                 return;
351
352         /* Print netdevice Info */
353         if (netdev) {
354                 dev_info(&adapter->pdev->dev, "Net device Info\n");
355                 printk(KERN_INFO "Device Name     state            "
356                         "trans_start      last_rx\n");
357                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
358                 netdev->name,
359                 netdev->state,
360                 netdev->trans_start,
361                 netdev->last_rx);
362         }
363
364         /* Print Registers */
365         dev_info(&adapter->pdev->dev, "Register Dump\n");
366         printk(KERN_INFO " Register Name   Value\n");
367         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368              reginfo->name; reginfo++) {
369                 igb_regdump(hw, reginfo);
370         }
371
372         /* Print TX Ring Summary */
373         if (!netdev || !netif_running(netdev))
374                 goto exit;
375
376         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
378                 " leng ntw timestamp\n");
379         for (n = 0; n < adapter->num_tx_queues; n++) {
380                 struct igb_tx_buffer *buffer_info;
381                 tx_ring = adapter->tx_ring[n];
382                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
385                            (u64)buffer_info->dma,
386                            buffer_info->length,
387                            buffer_info->next_to_watch,
388                            (u64)buffer_info->time_stamp);
389         }
390
391         /* Print TX Rings */
392         if (!netif_msg_tx_done(adapter))
393                 goto rx_ring_summary;
394
395         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
396
397         /* Transmit Descriptor Formats
398          *
399          * Advanced Transmit Descriptor
400          *   +--------------------------------------------------------------+
401          * 0 |         Buffer Address [63:0]                                |
402          *   +--------------------------------------------------------------+
403          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
404          *   +--------------------------------------------------------------+
405          *   63      46 45    40 39 38 36 35 32 31   24             15       0
406          */
407
408         for (n = 0; n < adapter->num_tx_queues; n++) {
409                 tx_ring = adapter->tx_ring[n];
410                 printk(KERN_INFO "------------------------------------\n");
411                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412                 printk(KERN_INFO "------------------------------------\n");
413                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
414                         "[PlPOCIStDDM Ln] [bi->dma       ] "
415                         "leng  ntw timestamp        bi->skb\n");
416
417                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418                         struct igb_tx_buffer *buffer_info;
419                         tx_desc = IGB_TX_DESC(tx_ring, i);
420                         buffer_info = &tx_ring->tx_buffer_info[i];
421                         u0 = (struct my_u0 *)tx_desc;
422                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
423                                 " %04X  %p %016llX %p", i,
424                                 le64_to_cpu(u0->a),
425                                 le64_to_cpu(u0->b),
426                                 (u64)buffer_info->dma,
427                                 buffer_info->length,
428                                 buffer_info->next_to_watch,
429                                 (u64)buffer_info->time_stamp,
430                                 buffer_info->skb);
431                         if (i == tx_ring->next_to_use &&
432                                 i == tx_ring->next_to_clean)
433                                 printk(KERN_CONT " NTC/U\n");
434                         else if (i == tx_ring->next_to_use)
435                                 printk(KERN_CONT " NTU\n");
436                         else if (i == tx_ring->next_to_clean)
437                                 printk(KERN_CONT " NTC\n");
438                         else
439                                 printk(KERN_CONT "\n");
440
441                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442                                 print_hex_dump(KERN_INFO, "",
443                                         DUMP_PREFIX_ADDRESS,
444                                         16, 1, phys_to_virt(buffer_info->dma),
445                                         buffer_info->length, true);
446                 }
447         }
448
449         /* Print RX Rings Summary */
450 rx_ring_summary:
451         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452         printk(KERN_INFO "Queue [NTU] [NTC]\n");
453         for (n = 0; n < adapter->num_rx_queues; n++) {
454                 rx_ring = adapter->rx_ring[n];
455                 printk(KERN_INFO " %5d %5X %5X\n", n,
456                            rx_ring->next_to_use, rx_ring->next_to_clean);
457         }
458
459         /* Print RX Rings */
460         if (!netif_msg_rx_status(adapter))
461                 goto exit;
462
463         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
464
465         /* Advanced Receive Descriptor (Read) Format
466          *    63                                           1        0
467          *    +-----------------------------------------------------+
468          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
469          *    +----------------------------------------------+------+
470          *  8 |       Header Buffer Address [63:1]           |  DD  |
471          *    +-----------------------------------------------------+
472          *
473          *
474          * Advanced Receive Descriptor (Write-Back) Format
475          *
476          *   63       48 47    32 31  30      21 20 17 16   4 3     0
477          *   +------------------------------------------------------+
478          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
479          *   | Checksum   Ident  |   |           |    | Type | Type |
480          *   +------------------------------------------------------+
481          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482          *   +------------------------------------------------------+
483          *   63       48 47    32 31            20 19               0
484          */
485
486         for (n = 0; n < adapter->num_rx_queues; n++) {
487                 rx_ring = adapter->rx_ring[n];
488                 printk(KERN_INFO "------------------------------------\n");
489                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490                 printk(KERN_INFO "------------------------------------\n");
491                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
492                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
493                         "<-- Adv Rx Read format\n");
494                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
495                         "[vl er S cks ln] ---------------- [bi->skb] "
496                         "<-- Adv Rx Write-Back format\n");
497
498                 for (i = 0; i < rx_ring->count; i++) {
499                         struct igb_rx_buffer *buffer_info;
500                         buffer_info = &rx_ring->rx_buffer_info[i];
501                         rx_desc = IGB_RX_DESC(rx_ring, i);
502                         u0 = (struct my_u0 *)rx_desc;
503                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504                         if (staterr & E1000_RXD_STAT_DD) {
505                                 /* Descriptor Done */
506                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
507                                         "%016llX ---------------- %p", i,
508                                         le64_to_cpu(u0->a),
509                                         le64_to_cpu(u0->b),
510                                         buffer_info->skb);
511                         } else {
512                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
513                                         "%016llX %016llX %p", i,
514                                         le64_to_cpu(u0->a),
515                                         le64_to_cpu(u0->b),
516                                         (u64)buffer_info->dma,
517                                         buffer_info->skb);
518
519                                 if (netif_msg_pktdata(adapter)) {
520                                         print_hex_dump(KERN_INFO, "",
521                                                 DUMP_PREFIX_ADDRESS,
522                                                 16, 1,
523                                                 phys_to_virt(buffer_info->dma),
524                                                 IGB_RX_HDR_LEN, true);
525                                         print_hex_dump(KERN_INFO, "",
526                                           DUMP_PREFIX_ADDRESS,
527                                           16, 1,
528                                           phys_to_virt(
529                                             buffer_info->page_dma +
530                                             buffer_info->page_offset),
531                                           PAGE_SIZE/2, true);
532                                 }
533                         }
534
535                         if (i == rx_ring->next_to_use)
536                                 printk(KERN_CONT " NTU\n");
537                         else if (i == rx_ring->next_to_clean)
538                                 printk(KERN_CONT " NTC\n");
539                         else
540                                 printk(KERN_CONT "\n");
541
542                 }
543         }
544
545 exit:
546         return;
547 }
548
549
550 /**
551  * igb_read_clock - read raw cycle counter (to be used by time counter)
552  */
553 static cycle_t igb_read_clock(const struct cyclecounter *tc)
554 {
555         struct igb_adapter *adapter =
556                 container_of(tc, struct igb_adapter, cycles);
557         struct e1000_hw *hw = &adapter->hw;
558         u64 stamp = 0;
559         int shift = 0;
560
561         /*
562          * The timestamp latches on lowest register read. For the 82580
563          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
564          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
565          */
566         if (hw->mac.type == e1000_82580) {
567                 stamp = rd32(E1000_SYSTIMR) >> 8;
568                 shift = IGB_82580_TSYNC_SHIFT;
569         }
570
571         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573         return stamp;
574 }
575
576 /**
577  * igb_get_hw_dev - return device
578  * used by hardware layer to print debugging information
579  **/
580 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
581 {
582         struct igb_adapter *adapter = hw->back;
583         return adapter->netdev;
584 }
585
586 /**
587  * igb_init_module - Driver Registration Routine
588  *
589  * igb_init_module is the first routine called when the driver is
590  * loaded. All it does is register with the PCI subsystem.
591  **/
592 static int __init igb_init_module(void)
593 {
594         int ret;
595         printk(KERN_INFO "%s - version %s\n",
596                igb_driver_string, igb_driver_version);
597
598         printk(KERN_INFO "%s\n", igb_copyright);
599
600 #ifdef CONFIG_IGB_DCA
601         dca_register_notify(&dca_notifier);
602 #endif
603         ret = pci_register_driver(&igb_driver);
604         return ret;
605 }
606
607 module_init(igb_init_module);
608
609 /**
610  * igb_exit_module - Driver Exit Cleanup Routine
611  *
612  * igb_exit_module is called just before the driver is removed
613  * from memory.
614  **/
615 static void __exit igb_exit_module(void)
616 {
617 #ifdef CONFIG_IGB_DCA
618         dca_unregister_notify(&dca_notifier);
619 #endif
620         pci_unregister_driver(&igb_driver);
621 }
622
623 module_exit(igb_exit_module);
624
625 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
626 /**
627  * igb_cache_ring_register - Descriptor ring to register mapping
628  * @adapter: board private structure to initialize
629  *
630  * Once we know the feature-set enabled for the device, we'll cache
631  * the register offset the descriptor ring is assigned to.
632  **/
633 static void igb_cache_ring_register(struct igb_adapter *adapter)
634 {
635         int i = 0, j = 0;
636         u32 rbase_offset = adapter->vfs_allocated_count;
637
638         switch (adapter->hw.mac.type) {
639         case e1000_82576:
640                 /* The queues are allocated for virtualization such that VF 0
641                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642                  * In order to avoid collision we start at the first free queue
643                  * and continue consuming queues in the same sequence
644                  */
645                 if (adapter->vfs_allocated_count) {
646                         for (; i < adapter->rss_queues; i++)
647                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
648                                                                Q_IDX_82576(i);
649                 }
650         case e1000_82575:
651         case e1000_82580:
652         case e1000_i350:
653         default:
654                 for (; i < adapter->num_rx_queues; i++)
655                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656                 for (; j < adapter->num_tx_queues; j++)
657                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658                 break;
659         }
660 }
661
662 static void igb_free_queues(struct igb_adapter *adapter)
663 {
664         int i;
665
666         for (i = 0; i < adapter->num_tx_queues; i++) {
667                 kfree(adapter->tx_ring[i]);
668                 adapter->tx_ring[i] = NULL;
669         }
670         for (i = 0; i < adapter->num_rx_queues; i++) {
671                 kfree(adapter->rx_ring[i]);
672                 adapter->rx_ring[i] = NULL;
673         }
674         adapter->num_rx_queues = 0;
675         adapter->num_tx_queues = 0;
676 }
677
678 /**
679  * igb_alloc_queues - Allocate memory for all rings
680  * @adapter: board private structure to initialize
681  *
682  * We allocate one ring per queue at run-time since we don't know the
683  * number of queues at compile-time.
684  **/
685 static int igb_alloc_queues(struct igb_adapter *adapter)
686 {
687         struct igb_ring *ring;
688         int i;
689         int orig_node = adapter->node;
690
691         for (i = 0; i < adapter->num_tx_queues; i++) {
692                 if (orig_node == -1) {
693                         int cur_node = next_online_node(adapter->node);
694                         if (cur_node == MAX_NUMNODES)
695                                 cur_node = first_online_node;
696                         adapter->node = cur_node;
697                 }
698                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699                                     adapter->node);
700                 if (!ring)
701                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702                 if (!ring)
703                         goto err;
704                 ring->count = adapter->tx_ring_count;
705                 ring->queue_index = i;
706                 ring->dev = &adapter->pdev->dev;
707                 ring->netdev = adapter->netdev;
708                 ring->numa_node = adapter->node;
709                 /* For 82575, context index must be unique per ring. */
710                 if (adapter->hw.mac.type == e1000_82575)
711                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712                 adapter->tx_ring[i] = ring;
713         }
714         /* Restore the adapter's original node */
715         adapter->node = orig_node;
716
717         for (i = 0; i < adapter->num_rx_queues; i++) {
718                 if (orig_node == -1) {
719                         int cur_node = next_online_node(adapter->node);
720                         if (cur_node == MAX_NUMNODES)
721                                 cur_node = first_online_node;
722                         adapter->node = cur_node;
723                 }
724                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725                                     adapter->node);
726                 if (!ring)
727                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728                 if (!ring)
729                         goto err;
730                 ring->count = adapter->rx_ring_count;
731                 ring->queue_index = i;
732                 ring->dev = &adapter->pdev->dev;
733                 ring->netdev = adapter->netdev;
734                 ring->numa_node = adapter->node;
735                 /* enable rx checksum */
736                 set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
737                 /* set flag indicating ring supports SCTP checksum offload */
738                 if (adapter->hw.mac.type >= e1000_82576)
739                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
740                 adapter->rx_ring[i] = ring;
741         }
742         /* Restore the adapter's original node */
743         adapter->node = orig_node;
744
745         igb_cache_ring_register(adapter);
746
747         return 0;
748
749 err:
750         /* Restore the adapter's original node */
751         adapter->node = orig_node;
752         igb_free_queues(adapter);
753
754         return -ENOMEM;
755 }
756
757 #define IGB_N0_QUEUE -1
758 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
759 {
760         u32 msixbm = 0;
761         struct igb_adapter *adapter = q_vector->adapter;
762         struct e1000_hw *hw = &adapter->hw;
763         u32 ivar, index;
764         int rx_queue = IGB_N0_QUEUE;
765         int tx_queue = IGB_N0_QUEUE;
766
767         if (q_vector->rx.ring)
768                 rx_queue = q_vector->rx.ring->reg_idx;
769         if (q_vector->tx.ring)
770                 tx_queue = q_vector->tx.ring->reg_idx;
771
772         switch (hw->mac.type) {
773         case e1000_82575:
774                 /* The 82575 assigns vectors using a bitmask, which matches the
775                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
776                    or more queues to a vector, we write the appropriate bits
777                    into the MSIXBM register for that vector. */
778                 if (rx_queue > IGB_N0_QUEUE)
779                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
780                 if (tx_queue > IGB_N0_QUEUE)
781                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
782                 if (!adapter->msix_entries && msix_vector == 0)
783                         msixbm |= E1000_EIMS_OTHER;
784                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
785                 q_vector->eims_value = msixbm;
786                 break;
787         case e1000_82576:
788                 /* 82576 uses a table-based method for assigning vectors.
789                    Each queue has a single entry in the table to which we write
790                    a vector number along with a "valid" bit.  Sadly, the layout
791                    of the table is somewhat counterintuitive. */
792                 if (rx_queue > IGB_N0_QUEUE) {
793                         index = (rx_queue & 0x7);
794                         ivar = array_rd32(E1000_IVAR0, index);
795                         if (rx_queue < 8) {
796                                 /* vector goes into low byte of register */
797                                 ivar = ivar & 0xFFFFFF00;
798                                 ivar |= msix_vector | E1000_IVAR_VALID;
799                         } else {
800                                 /* vector goes into third byte of register */
801                                 ivar = ivar & 0xFF00FFFF;
802                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803                         }
804                         array_wr32(E1000_IVAR0, index, ivar);
805                 }
806                 if (tx_queue > IGB_N0_QUEUE) {
807                         index = (tx_queue & 0x7);
808                         ivar = array_rd32(E1000_IVAR0, index);
809                         if (tx_queue < 8) {
810                                 /* vector goes into second byte of register */
811                                 ivar = ivar & 0xFFFF00FF;
812                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
813                         } else {
814                                 /* vector goes into high byte of register */
815                                 ivar = ivar & 0x00FFFFFF;
816                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817                         }
818                         array_wr32(E1000_IVAR0, index, ivar);
819                 }
820                 q_vector->eims_value = 1 << msix_vector;
821                 break;
822         case e1000_82580:
823         case e1000_i350:
824                 /* 82580 uses the same table-based approach as 82576 but has fewer
825                    entries as a result we carry over for queues greater than 4. */
826                 if (rx_queue > IGB_N0_QUEUE) {
827                         index = (rx_queue >> 1);
828                         ivar = array_rd32(E1000_IVAR0, index);
829                         if (rx_queue & 0x1) {
830                                 /* vector goes into third byte of register */
831                                 ivar = ivar & 0xFF00FFFF;
832                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
833                         } else {
834                                 /* vector goes into low byte of register */
835                                 ivar = ivar & 0xFFFFFF00;
836                                 ivar |= msix_vector | E1000_IVAR_VALID;
837                         }
838                         array_wr32(E1000_IVAR0, index, ivar);
839                 }
840                 if (tx_queue > IGB_N0_QUEUE) {
841                         index = (tx_queue >> 1);
842                         ivar = array_rd32(E1000_IVAR0, index);
843                         if (tx_queue & 0x1) {
844                                 /* vector goes into high byte of register */
845                                 ivar = ivar & 0x00FFFFFF;
846                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
847                         } else {
848                                 /* vector goes into second byte of register */
849                                 ivar = ivar & 0xFFFF00FF;
850                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
851                         }
852                         array_wr32(E1000_IVAR0, index, ivar);
853                 }
854                 q_vector->eims_value = 1 << msix_vector;
855                 break;
856         default:
857                 BUG();
858                 break;
859         }
860
861         /* add q_vector eims value to global eims_enable_mask */
862         adapter->eims_enable_mask |= q_vector->eims_value;
863
864         /* configure q_vector to set itr on first interrupt */
865         q_vector->set_itr = 1;
866 }
867
868 /**
869  * igb_configure_msix - Configure MSI-X hardware
870  *
871  * igb_configure_msix sets up the hardware to properly
872  * generate MSI-X interrupts.
873  **/
874 static void igb_configure_msix(struct igb_adapter *adapter)
875 {
876         u32 tmp;
877         int i, vector = 0;
878         struct e1000_hw *hw = &adapter->hw;
879
880         adapter->eims_enable_mask = 0;
881
882         /* set vector for other causes, i.e. link changes */
883         switch (hw->mac.type) {
884         case e1000_82575:
885                 tmp = rd32(E1000_CTRL_EXT);
886                 /* enable MSI-X PBA support*/
887                 tmp |= E1000_CTRL_EXT_PBA_CLR;
888
889                 /* Auto-Mask interrupts upon ICR read. */
890                 tmp |= E1000_CTRL_EXT_EIAME;
891                 tmp |= E1000_CTRL_EXT_IRCA;
892
893                 wr32(E1000_CTRL_EXT, tmp);
894
895                 /* enable msix_other interrupt */
896                 array_wr32(E1000_MSIXBM(0), vector++,
897                                       E1000_EIMS_OTHER);
898                 adapter->eims_other = E1000_EIMS_OTHER;
899
900                 break;
901
902         case e1000_82576:
903         case e1000_82580:
904         case e1000_i350:
905                 /* Turn on MSI-X capability first, or our settings
906                  * won't stick.  And it will take days to debug. */
907                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
909                                 E1000_GPIE_NSICR);
910
911                 /* enable msix_other interrupt */
912                 adapter->eims_other = 1 << vector;
913                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915                 wr32(E1000_IVAR_MISC, tmp);
916                 break;
917         default:
918                 /* do nothing, since nothing else supports MSI-X */
919                 break;
920         } /* switch (hw->mac.type) */
921
922         adapter->eims_enable_mask |= adapter->eims_other;
923
924         for (i = 0; i < adapter->num_q_vectors; i++)
925                 igb_assign_vector(adapter->q_vector[i], vector++);
926
927         wrfl();
928 }
929
930 /**
931  * igb_request_msix - Initialize MSI-X interrupts
932  *
933  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934  * kernel.
935  **/
936 static int igb_request_msix(struct igb_adapter *adapter)
937 {
938         struct net_device *netdev = adapter->netdev;
939         struct e1000_hw *hw = &adapter->hw;
940         int i, err = 0, vector = 0;
941
942         err = request_irq(adapter->msix_entries[vector].vector,
943                           igb_msix_other, 0, netdev->name, adapter);
944         if (err)
945                 goto out;
946         vector++;
947
948         for (i = 0; i < adapter->num_q_vectors; i++) {
949                 struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953                 if (q_vector->rx.ring && q_vector->tx.ring)
954                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955                                 q_vector->rx.ring->queue_index);
956                 else if (q_vector->tx.ring)
957                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958                                 q_vector->tx.ring->queue_index);
959                 else if (q_vector->rx.ring)
960                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961                                 q_vector->rx.ring->queue_index);
962                 else
963                         sprintf(q_vector->name, "%s-unused", netdev->name);
964
965                 err = request_irq(adapter->msix_entries[vector].vector,
966                                   igb_msix_ring, 0, q_vector->name,
967                                   q_vector);
968                 if (err)
969                         goto out;
970                 vector++;
971         }
972
973         igb_configure_msix(adapter);
974         return 0;
975 out:
976         return err;
977 }
978
979 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980 {
981         if (adapter->msix_entries) {
982                 pci_disable_msix(adapter->pdev);
983                 kfree(adapter->msix_entries);
984                 adapter->msix_entries = NULL;
985         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986                 pci_disable_msi(adapter->pdev);
987         }
988 }
989
990 /**
991  * igb_free_q_vectors - Free memory allocated for interrupt vectors
992  * @adapter: board private structure to initialize
993  *
994  * This function frees the memory allocated to the q_vectors.  In addition if
995  * NAPI is enabled it will delete any references to the NAPI struct prior
996  * to freeing the q_vector.
997  **/
998 static void igb_free_q_vectors(struct igb_adapter *adapter)
999 {
1000         int v_idx;
1001
1002         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004                 adapter->q_vector[v_idx] = NULL;
1005                 if (!q_vector)
1006                         continue;
1007                 netif_napi_del(&q_vector->napi);
1008                 kfree(q_vector);
1009         }
1010         adapter->num_q_vectors = 0;
1011 }
1012
1013 /**
1014  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015  *
1016  * This function resets the device so that it has 0 rx queues, tx queues, and
1017  * MSI-X interrupts allocated.
1018  */
1019 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020 {
1021         igb_free_queues(adapter);
1022         igb_free_q_vectors(adapter);
1023         igb_reset_interrupt_capability(adapter);
1024 }
1025
1026 /**
1027  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028  *
1029  * Attempt to configure interrupts using the best available
1030  * capabilities of the hardware and kernel.
1031  **/
1032 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033 {
1034         int err;
1035         int numvecs, i;
1036
1037         /* Number of supported queues. */
1038         adapter->num_rx_queues = adapter->rss_queues;
1039         if (adapter->vfs_allocated_count)
1040                 adapter->num_tx_queues = 1;
1041         else
1042                 adapter->num_tx_queues = adapter->rss_queues;
1043
1044         /* start with one vector for every rx queue */
1045         numvecs = adapter->num_rx_queues;
1046
1047         /* if tx handler is separate add 1 for every tx queue */
1048         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049                 numvecs += adapter->num_tx_queues;
1050
1051         /* store the number of vectors reserved for queues */
1052         adapter->num_q_vectors = numvecs;
1053
1054         /* add 1 vector for link status interrupts */
1055         numvecs++;
1056         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1057                                         GFP_KERNEL);
1058         if (!adapter->msix_entries)
1059                 goto msi_only;
1060
1061         for (i = 0; i < numvecs; i++)
1062                 adapter->msix_entries[i].entry = i;
1063
1064         err = pci_enable_msix(adapter->pdev,
1065                               adapter->msix_entries,
1066                               numvecs);
1067         if (err == 0)
1068                 goto out;
1069
1070         igb_reset_interrupt_capability(adapter);
1071
1072         /* If we can't do MSI-X, try MSI */
1073 msi_only:
1074 #ifdef CONFIG_PCI_IOV
1075         /* disable SR-IOV for non MSI-X configurations */
1076         if (adapter->vf_data) {
1077                 struct e1000_hw *hw = &adapter->hw;
1078                 /* disable iov and allow time for transactions to clear */
1079                 pci_disable_sriov(adapter->pdev);
1080                 msleep(500);
1081
1082                 kfree(adapter->vf_data);
1083                 adapter->vf_data = NULL;
1084                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1085                 wrfl();
1086                 msleep(100);
1087                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1088         }
1089 #endif
1090         adapter->vfs_allocated_count = 0;
1091         adapter->rss_queues = 1;
1092         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1093         adapter->num_rx_queues = 1;
1094         adapter->num_tx_queues = 1;
1095         adapter->num_q_vectors = 1;
1096         if (!pci_enable_msi(adapter->pdev))
1097                 adapter->flags |= IGB_FLAG_HAS_MSI;
1098 out:
1099         /* Notify the stack of the (possibly) reduced queue counts. */
1100         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1101         return netif_set_real_num_rx_queues(adapter->netdev,
1102                                             adapter->num_rx_queues);
1103 }
1104
1105 /**
1106  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1107  * @adapter: board private structure to initialize
1108  *
1109  * We allocate one q_vector per queue interrupt.  If allocation fails we
1110  * return -ENOMEM.
1111  **/
1112 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1113 {
1114         struct igb_q_vector *q_vector;
1115         struct e1000_hw *hw = &adapter->hw;
1116         int v_idx;
1117         int orig_node = adapter->node;
1118
1119         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1120                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1121                                                 adapter->num_tx_queues)) &&
1122                     (adapter->num_rx_queues == v_idx))
1123                         adapter->node = orig_node;
1124                 if (orig_node == -1) {
1125                         int cur_node = next_online_node(adapter->node);
1126                         if (cur_node == MAX_NUMNODES)
1127                                 cur_node = first_online_node;
1128                         adapter->node = cur_node;
1129                 }
1130                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1131                                         adapter->node);
1132                 if (!q_vector)
1133                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1134                                            GFP_KERNEL);
1135                 if (!q_vector)
1136                         goto err_out;
1137                 q_vector->adapter = adapter;
1138                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1139                 q_vector->itr_val = IGB_START_ITR;
1140                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1141                 adapter->q_vector[v_idx] = q_vector;
1142         }
1143         /* Restore the adapter's original node */
1144         adapter->node = orig_node;
1145
1146         return 0;
1147
1148 err_out:
1149         /* Restore the adapter's original node */
1150         adapter->node = orig_node;
1151         igb_free_q_vectors(adapter);
1152         return -ENOMEM;
1153 }
1154
1155 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1156                                       int ring_idx, int v_idx)
1157 {
1158         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1159
1160         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1161         q_vector->rx.ring->q_vector = q_vector;
1162         q_vector->rx.count++;
1163         q_vector->itr_val = adapter->rx_itr_setting;
1164         if (q_vector->itr_val && q_vector->itr_val <= 3)
1165                 q_vector->itr_val = IGB_START_ITR;
1166 }
1167
1168 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1169                                       int ring_idx, int v_idx)
1170 {
1171         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1172
1173         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1174         q_vector->tx.ring->q_vector = q_vector;
1175         q_vector->tx.count++;
1176         q_vector->itr_val = adapter->tx_itr_setting;
1177         q_vector->tx.work_limit = adapter->tx_work_limit;
1178         if (q_vector->itr_val && q_vector->itr_val <= 3)
1179                 q_vector->itr_val = IGB_START_ITR;
1180 }
1181
1182 /**
1183  * igb_map_ring_to_vector - maps allocated queues to vectors
1184  *
1185  * This function maps the recently allocated queues to vectors.
1186  **/
1187 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1188 {
1189         int i;
1190         int v_idx = 0;
1191
1192         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1193             (adapter->num_q_vectors < adapter->num_tx_queues))
1194                 return -ENOMEM;
1195
1196         if (adapter->num_q_vectors >=
1197             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1198                 for (i = 0; i < adapter->num_rx_queues; i++)
1199                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1200                 for (i = 0; i < adapter->num_tx_queues; i++)
1201                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1202         } else {
1203                 for (i = 0; i < adapter->num_rx_queues; i++) {
1204                         if (i < adapter->num_tx_queues)
1205                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1206                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1207                 }
1208                 for (; i < adapter->num_tx_queues; i++)
1209                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1210         }
1211         return 0;
1212 }
1213
1214 /**
1215  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1216  *
1217  * This function initializes the interrupts and allocates all of the queues.
1218  **/
1219 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1220 {
1221         struct pci_dev *pdev = adapter->pdev;
1222         int err;
1223
1224         err = igb_set_interrupt_capability(adapter);
1225         if (err)
1226                 return err;
1227
1228         err = igb_alloc_q_vectors(adapter);
1229         if (err) {
1230                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1231                 goto err_alloc_q_vectors;
1232         }
1233
1234         err = igb_alloc_queues(adapter);
1235         if (err) {
1236                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1237                 goto err_alloc_queues;
1238         }
1239
1240         err = igb_map_ring_to_vector(adapter);
1241         if (err) {
1242                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1243                 goto err_map_queues;
1244         }
1245
1246
1247         return 0;
1248 err_map_queues:
1249         igb_free_queues(adapter);
1250 err_alloc_queues:
1251         igb_free_q_vectors(adapter);
1252 err_alloc_q_vectors:
1253         igb_reset_interrupt_capability(adapter);
1254         return err;
1255 }
1256
1257 /**
1258  * igb_request_irq - initialize interrupts
1259  *
1260  * Attempts to configure interrupts using the best available
1261  * capabilities of the hardware and kernel.
1262  **/
1263 static int igb_request_irq(struct igb_adapter *adapter)
1264 {
1265         struct net_device *netdev = adapter->netdev;
1266         struct pci_dev *pdev = adapter->pdev;
1267         int err = 0;
1268
1269         if (adapter->msix_entries) {
1270                 err = igb_request_msix(adapter);
1271                 if (!err)
1272                         goto request_done;
1273                 /* fall back to MSI */
1274                 igb_clear_interrupt_scheme(adapter);
1275                 if (!pci_enable_msi(adapter->pdev))
1276                         adapter->flags |= IGB_FLAG_HAS_MSI;
1277                 igb_free_all_tx_resources(adapter);
1278                 igb_free_all_rx_resources(adapter);
1279                 adapter->num_tx_queues = 1;
1280                 adapter->num_rx_queues = 1;
1281                 adapter->num_q_vectors = 1;
1282                 err = igb_alloc_q_vectors(adapter);
1283                 if (err) {
1284                         dev_err(&pdev->dev,
1285                                 "Unable to allocate memory for vectors\n");
1286                         goto request_done;
1287                 }
1288                 err = igb_alloc_queues(adapter);
1289                 if (err) {
1290                         dev_err(&pdev->dev,
1291                                 "Unable to allocate memory for queues\n");
1292                         igb_free_q_vectors(adapter);
1293                         goto request_done;
1294                 }
1295                 igb_setup_all_tx_resources(adapter);
1296                 igb_setup_all_rx_resources(adapter);
1297         } else {
1298                 igb_assign_vector(adapter->q_vector[0], 0);
1299         }
1300
1301         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1302                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1303                                   netdev->name, adapter);
1304                 if (!err)
1305                         goto request_done;
1306
1307                 /* fall back to legacy interrupts */
1308                 igb_reset_interrupt_capability(adapter);
1309                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1310         }
1311
1312         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1313                           netdev->name, adapter);
1314
1315         if (err)
1316                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1317                         err);
1318
1319 request_done:
1320         return err;
1321 }
1322
1323 static void igb_free_irq(struct igb_adapter *adapter)
1324 {
1325         if (adapter->msix_entries) {
1326                 int vector = 0, i;
1327
1328                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1329
1330                 for (i = 0; i < adapter->num_q_vectors; i++) {
1331                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1332                         free_irq(adapter->msix_entries[vector++].vector,
1333                                  q_vector);
1334                 }
1335         } else {
1336                 free_irq(adapter->pdev->irq, adapter);
1337         }
1338 }
1339
1340 /**
1341  * igb_irq_disable - Mask off interrupt generation on the NIC
1342  * @adapter: board private structure
1343  **/
1344 static void igb_irq_disable(struct igb_adapter *adapter)
1345 {
1346         struct e1000_hw *hw = &adapter->hw;
1347
1348         /*
1349          * we need to be careful when disabling interrupts.  The VFs are also
1350          * mapped into these registers and so clearing the bits can cause
1351          * issues on the VF drivers so we only need to clear what we set
1352          */
1353         if (adapter->msix_entries) {
1354                 u32 regval = rd32(E1000_EIAM);
1355                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1357                 regval = rd32(E1000_EIAC);
1358                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1359         }
1360
1361         wr32(E1000_IAM, 0);
1362         wr32(E1000_IMC, ~0);
1363         wrfl();
1364         if (adapter->msix_entries) {
1365                 int i;
1366                 for (i = 0; i < adapter->num_q_vectors; i++)
1367                         synchronize_irq(adapter->msix_entries[i].vector);
1368         } else {
1369                 synchronize_irq(adapter->pdev->irq);
1370         }
1371 }
1372
1373 /**
1374  * igb_irq_enable - Enable default interrupt generation settings
1375  * @adapter: board private structure
1376  **/
1377 static void igb_irq_enable(struct igb_adapter *adapter)
1378 {
1379         struct e1000_hw *hw = &adapter->hw;
1380
1381         if (adapter->msix_entries) {
1382                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1383                 u32 regval = rd32(E1000_EIAC);
1384                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385                 regval = rd32(E1000_EIAM);
1386                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1388                 if (adapter->vfs_allocated_count) {
1389                         wr32(E1000_MBVFIMR, 0xFF);
1390                         ims |= E1000_IMS_VMMB;
1391                 }
1392                 if (adapter->hw.mac.type == e1000_82580)
1393                         ims |= E1000_IMS_DRSTA;
1394
1395                 wr32(E1000_IMS, ims);
1396         } else {
1397                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1398                                 E1000_IMS_DRSTA);
1399                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1400                                 E1000_IMS_DRSTA);
1401         }
1402 }
1403
1404 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1405 {
1406         struct e1000_hw *hw = &adapter->hw;
1407         u16 vid = adapter->hw.mng_cookie.vlan_id;
1408         u16 old_vid = adapter->mng_vlan_id;
1409
1410         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1411                 /* add VID to filter table */
1412                 igb_vfta_set(hw, vid, true);
1413                 adapter->mng_vlan_id = vid;
1414         } else {
1415                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1416         }
1417
1418         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1419             (vid != old_vid) &&
1420             !test_bit(old_vid, adapter->active_vlans)) {
1421                 /* remove VID from filter table */
1422                 igb_vfta_set(hw, old_vid, false);
1423         }
1424 }
1425
1426 /**
1427  * igb_release_hw_control - release control of the h/w to f/w
1428  * @adapter: address of board private structure
1429  *
1430  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1431  * For ASF and Pass Through versions of f/w this means that the
1432  * driver is no longer loaded.
1433  *
1434  **/
1435 static void igb_release_hw_control(struct igb_adapter *adapter)
1436 {
1437         struct e1000_hw *hw = &adapter->hw;
1438         u32 ctrl_ext;
1439
1440         /* Let firmware take over control of h/w */
1441         ctrl_ext = rd32(E1000_CTRL_EXT);
1442         wr32(E1000_CTRL_EXT,
1443                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1444 }
1445
1446 /**
1447  * igb_get_hw_control - get control of the h/w from f/w
1448  * @adapter: address of board private structure
1449  *
1450  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1451  * For ASF and Pass Through versions of f/w this means that
1452  * the driver is loaded.
1453  *
1454  **/
1455 static void igb_get_hw_control(struct igb_adapter *adapter)
1456 {
1457         struct e1000_hw *hw = &adapter->hw;
1458         u32 ctrl_ext;
1459
1460         /* Let firmware know the driver has taken over */
1461         ctrl_ext = rd32(E1000_CTRL_EXT);
1462         wr32(E1000_CTRL_EXT,
1463                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1464 }
1465
1466 /**
1467  * igb_configure - configure the hardware for RX and TX
1468  * @adapter: private board structure
1469  **/
1470 static void igb_configure(struct igb_adapter *adapter)
1471 {
1472         struct net_device *netdev = adapter->netdev;
1473         int i;
1474
1475         igb_get_hw_control(adapter);
1476         igb_set_rx_mode(netdev);
1477
1478         igb_restore_vlan(adapter);
1479
1480         igb_setup_tctl(adapter);
1481         igb_setup_mrqc(adapter);
1482         igb_setup_rctl(adapter);
1483
1484         igb_configure_tx(adapter);
1485         igb_configure_rx(adapter);
1486
1487         igb_rx_fifo_flush_82575(&adapter->hw);
1488
1489         /* call igb_desc_unused which always leaves
1490          * at least 1 descriptor unused to make sure
1491          * next_to_use != next_to_clean */
1492         for (i = 0; i < adapter->num_rx_queues; i++) {
1493                 struct igb_ring *ring = adapter->rx_ring[i];
1494                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1495         }
1496 }
1497
1498 /**
1499  * igb_power_up_link - Power up the phy/serdes link
1500  * @adapter: address of board private structure
1501  **/
1502 void igb_power_up_link(struct igb_adapter *adapter)
1503 {
1504         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1505                 igb_power_up_phy_copper(&adapter->hw);
1506         else
1507                 igb_power_up_serdes_link_82575(&adapter->hw);
1508 }
1509
1510 /**
1511  * igb_power_down_link - Power down the phy/serdes link
1512  * @adapter: address of board private structure
1513  */
1514 static void igb_power_down_link(struct igb_adapter *adapter)
1515 {
1516         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517                 igb_power_down_phy_copper_82575(&adapter->hw);
1518         else
1519                 igb_shutdown_serdes_link_82575(&adapter->hw);
1520 }
1521
1522 /**
1523  * igb_up - Open the interface and prepare it to handle traffic
1524  * @adapter: board private structure
1525  **/
1526 int igb_up(struct igb_adapter *adapter)
1527 {
1528         struct e1000_hw *hw = &adapter->hw;
1529         int i;
1530
1531         /* hardware has been reset, we need to reload some things */
1532         igb_configure(adapter);
1533
1534         clear_bit(__IGB_DOWN, &adapter->state);
1535
1536         for (i = 0; i < adapter->num_q_vectors; i++) {
1537                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538                 napi_enable(&q_vector->napi);
1539         }
1540         if (adapter->msix_entries)
1541                 igb_configure_msix(adapter);
1542         else
1543                 igb_assign_vector(adapter->q_vector[0], 0);
1544
1545         /* Clear any pending interrupts. */
1546         rd32(E1000_ICR);
1547         igb_irq_enable(adapter);
1548
1549         /* notify VFs that reset has been completed */
1550         if (adapter->vfs_allocated_count) {
1551                 u32 reg_data = rd32(E1000_CTRL_EXT);
1552                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1553                 wr32(E1000_CTRL_EXT, reg_data);
1554         }
1555
1556         netif_tx_start_all_queues(adapter->netdev);
1557
1558         /* start the watchdog. */
1559         hw->mac.get_link_status = 1;
1560         schedule_work(&adapter->watchdog_task);
1561
1562         return 0;
1563 }
1564
1565 void igb_down(struct igb_adapter *adapter)
1566 {
1567         struct net_device *netdev = adapter->netdev;
1568         struct e1000_hw *hw = &adapter->hw;
1569         u32 tctl, rctl;
1570         int i;
1571
1572         /* signal that we're down so the interrupt handler does not
1573          * reschedule our watchdog timer */
1574         set_bit(__IGB_DOWN, &adapter->state);
1575
1576         /* disable receives in the hardware */
1577         rctl = rd32(E1000_RCTL);
1578         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1579         /* flush and sleep below */
1580
1581         netif_tx_stop_all_queues(netdev);
1582
1583         /* disable transmits in the hardware */
1584         tctl = rd32(E1000_TCTL);
1585         tctl &= ~E1000_TCTL_EN;
1586         wr32(E1000_TCTL, tctl);
1587         /* flush both disables and wait for them to finish */
1588         wrfl();
1589         msleep(10);
1590
1591         for (i = 0; i < adapter->num_q_vectors; i++) {
1592                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1593                 napi_disable(&q_vector->napi);
1594         }
1595
1596         igb_irq_disable(adapter);
1597
1598         del_timer_sync(&adapter->watchdog_timer);
1599         del_timer_sync(&adapter->phy_info_timer);
1600
1601         netif_carrier_off(netdev);
1602
1603         /* record the stats before reset*/
1604         spin_lock(&adapter->stats64_lock);
1605         igb_update_stats(adapter, &adapter->stats64);
1606         spin_unlock(&adapter->stats64_lock);
1607
1608         adapter->link_speed = 0;
1609         adapter->link_duplex = 0;
1610
1611         if (!pci_channel_offline(adapter->pdev))
1612                 igb_reset(adapter);
1613         igb_clean_all_tx_rings(adapter);
1614         igb_clean_all_rx_rings(adapter);
1615 #ifdef CONFIG_IGB_DCA
1616
1617         /* since we reset the hardware DCA settings were cleared */
1618         igb_setup_dca(adapter);
1619 #endif
1620 }
1621
1622 void igb_reinit_locked(struct igb_adapter *adapter)
1623 {
1624         WARN_ON(in_interrupt());
1625         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1626                 msleep(1);
1627         igb_down(adapter);
1628         igb_up(adapter);
1629         clear_bit(__IGB_RESETTING, &adapter->state);
1630 }
1631
1632 void igb_reset(struct igb_adapter *adapter)
1633 {
1634         struct pci_dev *pdev = adapter->pdev;
1635         struct e1000_hw *hw = &adapter->hw;
1636         struct e1000_mac_info *mac = &hw->mac;
1637         struct e1000_fc_info *fc = &hw->fc;
1638         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1639         u16 hwm;
1640
1641         /* Repartition Pba for greater than 9k mtu
1642          * To take effect CTRL.RST is required.
1643          */
1644         switch (mac->type) {
1645         case e1000_i350:
1646         case e1000_82580:
1647                 pba = rd32(E1000_RXPBS);
1648                 pba = igb_rxpbs_adjust_82580(pba);
1649                 break;
1650         case e1000_82576:
1651                 pba = rd32(E1000_RXPBS);
1652                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1653                 break;
1654         case e1000_82575:
1655         default:
1656                 pba = E1000_PBA_34K;
1657                 break;
1658         }
1659
1660         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661             (mac->type < e1000_82576)) {
1662                 /* adjust PBA for jumbo frames */
1663                 wr32(E1000_PBA, pba);
1664
1665                 /* To maintain wire speed transmits, the Tx FIFO should be
1666                  * large enough to accommodate two full transmit packets,
1667                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1668                  * the Rx FIFO should be large enough to accommodate at least
1669                  * one full receive packet and is similarly rounded up and
1670                  * expressed in KB. */
1671                 pba = rd32(E1000_PBA);
1672                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1673                 tx_space = pba >> 16;
1674                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1675                 pba &= 0xffff;
1676                 /* the tx fifo also stores 16 bytes of information about the tx
1677                  * but don't include ethernet FCS because hardware appends it */
1678                 min_tx_space = (adapter->max_frame_size +
1679                                 sizeof(union e1000_adv_tx_desc) -
1680                                 ETH_FCS_LEN) * 2;
1681                 min_tx_space = ALIGN(min_tx_space, 1024);
1682                 min_tx_space >>= 10;
1683                 /* software strips receive CRC, so leave room for it */
1684                 min_rx_space = adapter->max_frame_size;
1685                 min_rx_space = ALIGN(min_rx_space, 1024);
1686                 min_rx_space >>= 10;
1687
1688                 /* If current Tx allocation is less than the min Tx FIFO size,
1689                  * and the min Tx FIFO size is less than the current Rx FIFO
1690                  * allocation, take space away from current Rx allocation */
1691                 if (tx_space < min_tx_space &&
1692                     ((min_tx_space - tx_space) < pba)) {
1693                         pba = pba - (min_tx_space - tx_space);
1694
1695                         /* if short on rx space, rx wins and must trump tx
1696                          * adjustment */
1697                         if (pba < min_rx_space)
1698                                 pba = min_rx_space;
1699                 }
1700                 wr32(E1000_PBA, pba);
1701         }
1702
1703         /* flow control settings */
1704         /* The high water mark must be low enough to fit one full frame
1705          * (or the size used for early receive) above it in the Rx FIFO.
1706          * Set it to the lower of:
1707          * - 90% of the Rx FIFO size, or
1708          * - the full Rx FIFO size minus one full frame */
1709         hwm = min(((pba << 10) * 9 / 10),
1710                         ((pba << 10) - 2 * adapter->max_frame_size));
1711
1712         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1713         fc->low_water = fc->high_water - 16;
1714         fc->pause_time = 0xFFFF;
1715         fc->send_xon = 1;
1716         fc->current_mode = fc->requested_mode;
1717
1718         /* disable receive for all VFs and wait one second */
1719         if (adapter->vfs_allocated_count) {
1720                 int i;
1721                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1723
1724                 /* ping all the active vfs to let them know we are going down */
1725                 igb_ping_all_vfs(adapter);
1726
1727                 /* disable transmits and receives */
1728                 wr32(E1000_VFRE, 0);
1729                 wr32(E1000_VFTE, 0);
1730         }
1731
1732         /* Allow time for pending master requests to run */
1733         hw->mac.ops.reset_hw(hw);
1734         wr32(E1000_WUC, 0);
1735
1736         if (hw->mac.ops.init_hw(hw))
1737                 dev_err(&pdev->dev, "Hardware Error\n");
1738         if (hw->mac.type > e1000_82580) {
1739                 if (adapter->flags & IGB_FLAG_DMAC) {
1740                         u32 reg;
1741
1742                         /*
1743                          * DMA Coalescing high water mark needs to be higher
1744                          * than * the * Rx threshold.  The Rx threshold is
1745                          * currently * pba - 6, so we * should use a high water
1746                          * mark of pba * - 4. */
1747                         hwm = (pba - 4) << 10;
1748
1749                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1750                                & E1000_DMACR_DMACTHR_MASK);
1751
1752                         /* transition to L0x or L1 if available..*/
1753                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1754
1755                         /* watchdog timer= +-1000 usec in 32usec intervals */
1756                         reg |= (1000 >> 5);
1757                         wr32(E1000_DMACR, reg);
1758
1759                         /* no lower threshold to disable coalescing(smart fifb)
1760                          * -UTRESH=0*/
1761                         wr32(E1000_DMCRTRH, 0);
1762
1763                         /* set hwm to PBA -  2 * max frame size */
1764                         wr32(E1000_FCRTC, hwm);
1765
1766                         /*
1767                          * This sets the time to wait before requesting tran-
1768                          * sition to * low power state to number of usecs needed
1769                          * to receive 1 512 * byte frame at gigabit line rate
1770                          */
1771                         reg = rd32(E1000_DMCTLX);
1772                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1773
1774                         /* Delay 255 usec before entering Lx state. */
1775                         reg |= 0xFF;
1776                         wr32(E1000_DMCTLX, reg);
1777
1778                         /* free space in Tx packet buffer to wake from DMAC */
1779                         wr32(E1000_DMCTXTH,
1780                              (IGB_MIN_TXPBSIZE -
1781                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1782                              >> 6);
1783
1784                         /* make low power state decision controlled by DMAC */
1785                         reg = rd32(E1000_PCIEMISC);
1786                         reg |= E1000_PCIEMISC_LX_DECISION;
1787                         wr32(E1000_PCIEMISC, reg);
1788                 } /* end if IGB_FLAG_DMAC set */
1789         }
1790         if (hw->mac.type == e1000_82580) {
1791                 u32 reg = rd32(E1000_PCIEMISC);
1792                 wr32(E1000_PCIEMISC,
1793                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1794         }
1795         if (!netif_running(adapter->netdev))
1796                 igb_power_down_link(adapter);
1797
1798         igb_update_mng_vlan(adapter);
1799
1800         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1801         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1802
1803         igb_get_phy_info(hw);
1804 }
1805
1806 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1807 {
1808         /*
1809          * Since there is no support for separate rx/tx vlan accel
1810          * enable/disable make sure tx flag is always in same state as rx.
1811          */
1812         if (features & NETIF_F_HW_VLAN_RX)
1813                 features |= NETIF_F_HW_VLAN_TX;
1814         else
1815                 features &= ~NETIF_F_HW_VLAN_TX;
1816
1817         return features;
1818 }
1819
1820 static int igb_set_features(struct net_device *netdev, u32 features)
1821 {
1822         struct igb_adapter *adapter = netdev_priv(netdev);
1823         int i;
1824         u32 changed = netdev->features ^ features;
1825
1826         for (i = 0; i < adapter->num_rx_queues; i++) {
1827                 if (features & NETIF_F_RXCSUM)
1828                         set_bit(IGB_RING_FLAG_RX_CSUM,
1829                                 &adapter->rx_ring[i]->flags);
1830                 else
1831                         clear_bit(IGB_RING_FLAG_RX_CSUM,
1832                                   &adapter->rx_ring[i]->flags);
1833         }
1834
1835         if (changed & NETIF_F_HW_VLAN_RX)
1836                 igb_vlan_mode(netdev, features);
1837
1838         return 0;
1839 }
1840
1841 static const struct net_device_ops igb_netdev_ops = {
1842         .ndo_open               = igb_open,
1843         .ndo_stop               = igb_close,
1844         .ndo_start_xmit         = igb_xmit_frame,
1845         .ndo_get_stats64        = igb_get_stats64,
1846         .ndo_set_rx_mode        = igb_set_rx_mode,
1847         .ndo_set_mac_address    = igb_set_mac,
1848         .ndo_change_mtu         = igb_change_mtu,
1849         .ndo_do_ioctl           = igb_ioctl,
1850         .ndo_tx_timeout         = igb_tx_timeout,
1851         .ndo_validate_addr      = eth_validate_addr,
1852         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1853         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1854         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1855         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1856         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1857         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1858 #ifdef CONFIG_NET_POLL_CONTROLLER
1859         .ndo_poll_controller    = igb_netpoll,
1860 #endif
1861         .ndo_fix_features       = igb_fix_features,
1862         .ndo_set_features       = igb_set_features,
1863 };
1864
1865 /**
1866  * igb_probe - Device Initialization Routine
1867  * @pdev: PCI device information struct
1868  * @ent: entry in igb_pci_tbl
1869  *
1870  * Returns 0 on success, negative on failure
1871  *
1872  * igb_probe initializes an adapter identified by a pci_dev structure.
1873  * The OS initialization, configuring of the adapter private structure,
1874  * and a hardware reset occur.
1875  **/
1876 static int __devinit igb_probe(struct pci_dev *pdev,
1877                                const struct pci_device_id *ent)
1878 {
1879         struct net_device *netdev;
1880         struct igb_adapter *adapter;
1881         struct e1000_hw *hw;
1882         u16 eeprom_data = 0;
1883         s32 ret_val;
1884         static int global_quad_port_a; /* global quad port a indication */
1885         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1886         unsigned long mmio_start, mmio_len;
1887         int err, pci_using_dac;
1888         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1889         u8 part_str[E1000_PBANUM_LENGTH];
1890
1891         /* Catch broken hardware that put the wrong VF device ID in
1892          * the PCIe SR-IOV capability.
1893          */
1894         if (pdev->is_virtfn) {
1895                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1896                      pci_name(pdev), pdev->vendor, pdev->device);
1897                 return -EINVAL;
1898         }
1899
1900         err = pci_enable_device_mem(pdev);
1901         if (err)
1902                 return err;
1903
1904         pci_using_dac = 0;
1905         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1906         if (!err) {
1907                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1908                 if (!err)
1909                         pci_using_dac = 1;
1910         } else {
1911                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1912                 if (err) {
1913                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1914                         if (err) {
1915                                 dev_err(&pdev->dev, "No usable DMA "
1916                                         "configuration, aborting\n");
1917                                 goto err_dma;
1918                         }
1919                 }
1920         }
1921
1922         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1923                                            IORESOURCE_MEM),
1924                                            igb_driver_name);
1925         if (err)
1926                 goto err_pci_reg;
1927
1928         pci_enable_pcie_error_reporting(pdev);
1929
1930         pci_set_master(pdev);
1931         pci_save_state(pdev);
1932
1933         err = -ENOMEM;
1934         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1935                                    IGB_MAX_TX_QUEUES);
1936         if (!netdev)
1937                 goto err_alloc_etherdev;
1938
1939         SET_NETDEV_DEV(netdev, &pdev->dev);
1940
1941         pci_set_drvdata(pdev, netdev);
1942         adapter = netdev_priv(netdev);
1943         adapter->netdev = netdev;
1944         adapter->pdev = pdev;
1945         hw = &adapter->hw;
1946         hw->back = adapter;
1947         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1948
1949         mmio_start = pci_resource_start(pdev, 0);
1950         mmio_len = pci_resource_len(pdev, 0);
1951
1952         err = -EIO;
1953         hw->hw_addr = ioremap(mmio_start, mmio_len);
1954         if (!hw->hw_addr)
1955                 goto err_ioremap;
1956
1957         netdev->netdev_ops = &igb_netdev_ops;
1958         igb_set_ethtool_ops(netdev);
1959         netdev->watchdog_timeo = 5 * HZ;
1960
1961         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1962
1963         netdev->mem_start = mmio_start;
1964         netdev->mem_end = mmio_start + mmio_len;
1965
1966         /* PCI config space info */
1967         hw->vendor_id = pdev->vendor;
1968         hw->device_id = pdev->device;
1969         hw->revision_id = pdev->revision;
1970         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1971         hw->subsystem_device_id = pdev->subsystem_device;
1972
1973         /* Copy the default MAC, PHY and NVM function pointers */
1974         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1975         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1976         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1977         /* Initialize skew-specific constants */
1978         err = ei->get_invariants(hw);
1979         if (err)
1980                 goto err_sw_init;
1981
1982         /* setup the private structure */
1983         err = igb_sw_init(adapter);
1984         if (err)
1985                 goto err_sw_init;
1986
1987         igb_get_bus_info_pcie(hw);
1988
1989         hw->phy.autoneg_wait_to_complete = false;
1990
1991         /* Copper options */
1992         if (hw->phy.media_type == e1000_media_type_copper) {
1993                 hw->phy.mdix = AUTO_ALL_MODES;
1994                 hw->phy.disable_polarity_correction = false;
1995                 hw->phy.ms_type = e1000_ms_hw_default;
1996         }
1997
1998         if (igb_check_reset_block(hw))
1999                 dev_info(&pdev->dev,
2000                         "PHY reset is blocked due to SOL/IDER session.\n");
2001
2002         netdev->hw_features = NETIF_F_SG |
2003                            NETIF_F_IP_CSUM |
2004                            NETIF_F_IPV6_CSUM |
2005                            NETIF_F_TSO |
2006                            NETIF_F_TSO6 |
2007                            NETIF_F_RXCSUM |
2008                            NETIF_F_HW_VLAN_RX;
2009
2010         netdev->features = netdev->hw_features |
2011                            NETIF_F_HW_VLAN_TX |
2012                            NETIF_F_HW_VLAN_FILTER;
2013
2014         netdev->vlan_features |= NETIF_F_TSO;
2015         netdev->vlan_features |= NETIF_F_TSO6;
2016         netdev->vlan_features |= NETIF_F_IP_CSUM;
2017         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
2018         netdev->vlan_features |= NETIF_F_SG;
2019
2020         if (pci_using_dac) {
2021                 netdev->features |= NETIF_F_HIGHDMA;
2022                 netdev->vlan_features |= NETIF_F_HIGHDMA;
2023         }
2024
2025         if (hw->mac.type >= e1000_82576) {
2026                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
2027                 netdev->features |= NETIF_F_SCTP_CSUM;
2028         }
2029
2030         netdev->priv_flags |= IFF_UNICAST_FLT;
2031
2032         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2033
2034         /* before reading the NVM, reset the controller to put the device in a
2035          * known good starting state */
2036         hw->mac.ops.reset_hw(hw);
2037
2038         /* make sure the NVM is good */
2039         if (hw->nvm.ops.validate(hw) < 0) {
2040                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2041                 err = -EIO;
2042                 goto err_eeprom;
2043         }
2044
2045         /* copy the MAC address out of the NVM */
2046         if (hw->mac.ops.read_mac_addr(hw))
2047                 dev_err(&pdev->dev, "NVM Read Error\n");
2048
2049         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2050         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2051
2052         if (!is_valid_ether_addr(netdev->perm_addr)) {
2053                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2054                 err = -EIO;
2055                 goto err_eeprom;
2056         }
2057
2058         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2059                     (unsigned long) adapter);
2060         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2061                     (unsigned long) adapter);
2062
2063         INIT_WORK(&adapter->reset_task, igb_reset_task);
2064         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2065
2066         /* Initialize link properties that are user-changeable */
2067         adapter->fc_autoneg = true;
2068         hw->mac.autoneg = true;
2069         hw->phy.autoneg_advertised = 0x2f;
2070
2071         hw->fc.requested_mode = e1000_fc_default;
2072         hw->fc.current_mode = e1000_fc_default;
2073
2074         igb_validate_mdi_setting(hw);
2075
2076         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2077          * enable the ACPI Magic Packet filter
2078          */
2079
2080         if (hw->bus.func == 0)
2081                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2082         else if (hw->mac.type >= e1000_82580)
2083                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2084                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2085                                  &eeprom_data);
2086         else if (hw->bus.func == 1)
2087                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2088
2089         if (eeprom_data & eeprom_apme_mask)
2090                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2091
2092         /* now that we have the eeprom settings, apply the special cases where
2093          * the eeprom may be wrong or the board simply won't support wake on
2094          * lan on a particular port */
2095         switch (pdev->device) {
2096         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2097                 adapter->eeprom_wol = 0;
2098                 break;
2099         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2100         case E1000_DEV_ID_82576_FIBER:
2101         case E1000_DEV_ID_82576_SERDES:
2102                 /* Wake events only supported on port A for dual fiber
2103                  * regardless of eeprom setting */
2104                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2105                         adapter->eeprom_wol = 0;
2106                 break;
2107         case E1000_DEV_ID_82576_QUAD_COPPER:
2108         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2109                 /* if quad port adapter, disable WoL on all but port A */
2110                 if (global_quad_port_a != 0)
2111                         adapter->eeprom_wol = 0;
2112                 else
2113                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2114                 /* Reset for multiple quad port adapters */
2115                 if (++global_quad_port_a == 4)
2116                         global_quad_port_a = 0;
2117                 break;
2118         }
2119
2120         /* initialize the wol settings based on the eeprom settings */
2121         adapter->wol = adapter->eeprom_wol;
2122         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2123
2124         /* reset the hardware with the new settings */
2125         igb_reset(adapter);
2126
2127         /* let the f/w know that the h/w is now under the control of the
2128          * driver. */
2129         igb_get_hw_control(adapter);
2130
2131         strcpy(netdev->name, "eth%d");
2132         err = register_netdev(netdev);
2133         if (err)
2134                 goto err_register;
2135
2136         igb_vlan_mode(netdev, netdev->features);
2137
2138         /* carrier off reporting is important to ethtool even BEFORE open */
2139         netif_carrier_off(netdev);
2140
2141 #ifdef CONFIG_IGB_DCA
2142         if (dca_add_requester(&pdev->dev) == 0) {
2143                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2144                 dev_info(&pdev->dev, "DCA enabled\n");
2145                 igb_setup_dca(adapter);
2146         }
2147
2148 #endif
2149         /* do hw tstamp init after resetting */
2150         igb_init_hw_timer(adapter);
2151
2152         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2153         /* print bus type/speed/width info */
2154         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2155                  netdev->name,
2156                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2157                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2158                                                             "unknown"),
2159                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2160                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2161                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2162                    "unknown"),
2163                  netdev->dev_addr);
2164
2165         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2166         if (ret_val)
2167                 strcpy(part_str, "Unknown");
2168         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2169         dev_info(&pdev->dev,
2170                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2171                 adapter->msix_entries ? "MSI-X" :
2172                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2173                 adapter->num_rx_queues, adapter->num_tx_queues);
2174         switch (hw->mac.type) {
2175         case e1000_i350:
2176                 igb_set_eee_i350(hw);
2177                 break;
2178         default:
2179                 break;
2180         }
2181         return 0;
2182
2183 err_register:
2184         igb_release_hw_control(adapter);
2185 err_eeprom:
2186         if (!igb_check_reset_block(hw))
2187                 igb_reset_phy(hw);
2188
2189         if (hw->flash_address)
2190                 iounmap(hw->flash_address);
2191 err_sw_init:
2192         igb_clear_interrupt_scheme(adapter);
2193         iounmap(hw->hw_addr);
2194 err_ioremap:
2195         free_netdev(netdev);
2196 err_alloc_etherdev:
2197         pci_release_selected_regions(pdev,
2198                                      pci_select_bars(pdev, IORESOURCE_MEM));
2199 err_pci_reg:
2200 err_dma:
2201         pci_disable_device(pdev);
2202         return err;
2203 }
2204
2205 /**
2206  * igb_remove - Device Removal Routine
2207  * @pdev: PCI device information struct
2208  *
2209  * igb_remove is called by the PCI subsystem to alert the driver
2210  * that it should release a PCI device.  The could be caused by a
2211  * Hot-Plug event, or because the driver is going to be removed from
2212  * memory.
2213  **/
2214 static void __devexit igb_remove(struct pci_dev *pdev)
2215 {
2216         struct net_device *netdev = pci_get_drvdata(pdev);
2217         struct igb_adapter *adapter = netdev_priv(netdev);
2218         struct e1000_hw *hw = &adapter->hw;
2219
2220         /*
2221          * The watchdog timer may be rescheduled, so explicitly
2222          * disable watchdog from being rescheduled.
2223          */
2224         set_bit(__IGB_DOWN, &adapter->state);
2225         del_timer_sync(&adapter->watchdog_timer);
2226         del_timer_sync(&adapter->phy_info_timer);
2227
2228         cancel_work_sync(&adapter->reset_task);
2229         cancel_work_sync(&adapter->watchdog_task);
2230
2231 #ifdef CONFIG_IGB_DCA
2232         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2233                 dev_info(&pdev->dev, "DCA disabled\n");
2234                 dca_remove_requester(&pdev->dev);
2235                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2236                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2237         }
2238 #endif
2239
2240         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2241          * would have already happened in close and is redundant. */
2242         igb_release_hw_control(adapter);
2243
2244         unregister_netdev(netdev);
2245
2246         igb_clear_interrupt_scheme(adapter);
2247
2248 #ifdef CONFIG_PCI_IOV
2249         /* reclaim resources allocated to VFs */
2250         if (adapter->vf_data) {
2251                 /* disable iov and allow time for transactions to clear */
2252                 pci_disable_sriov(pdev);
2253                 msleep(500);
2254
2255                 kfree(adapter->vf_data);
2256                 adapter->vf_data = NULL;
2257                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2258                 wrfl();
2259                 msleep(100);
2260                 dev_info(&pdev->dev, "IOV Disabled\n");
2261         }
2262 #endif
2263
2264         iounmap(hw->hw_addr);
2265         if (hw->flash_address)
2266                 iounmap(hw->flash_address);
2267         pci_release_selected_regions(pdev,
2268                                      pci_select_bars(pdev, IORESOURCE_MEM));
2269
2270         free_netdev(netdev);
2271
2272         pci_disable_pcie_error_reporting(pdev);
2273
2274         pci_disable_device(pdev);
2275 }
2276
2277 /**
2278  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2279  * @adapter: board private structure to initialize
2280  *
2281  * This function initializes the vf specific data storage and then attempts to
2282  * allocate the VFs.  The reason for ordering it this way is because it is much
2283  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2284  * the memory for the VFs.
2285  **/
2286 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2287 {
2288 #ifdef CONFIG_PCI_IOV
2289         struct pci_dev *pdev = adapter->pdev;
2290
2291         if (adapter->vfs_allocated_count) {
2292                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2293                                            sizeof(struct vf_data_storage),
2294                                            GFP_KERNEL);
2295                 /* if allocation failed then we do not support SR-IOV */
2296                 if (!adapter->vf_data) {
2297                         adapter->vfs_allocated_count = 0;
2298                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2299                                 "Data Storage\n");
2300                 }
2301         }
2302
2303         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2304                 kfree(adapter->vf_data);
2305                 adapter->vf_data = NULL;
2306 #endif /* CONFIG_PCI_IOV */
2307                 adapter->vfs_allocated_count = 0;
2308 #ifdef CONFIG_PCI_IOV
2309         } else {
2310                 unsigned char mac_addr[ETH_ALEN];
2311                 int i;
2312                 dev_info(&pdev->dev, "%d vfs allocated\n",
2313                          adapter->vfs_allocated_count);
2314                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2315                         random_ether_addr(mac_addr);
2316                         igb_set_vf_mac(adapter, i, mac_addr);
2317                 }
2318                 /* DMA Coalescing is not supported in IOV mode. */
2319                 if (adapter->flags & IGB_FLAG_DMAC)
2320                         adapter->flags &= ~IGB_FLAG_DMAC;
2321         }
2322 #endif /* CONFIG_PCI_IOV */
2323 }
2324
2325
2326 /**
2327  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2328  * @adapter: board private structure to initialize
2329  *
2330  * igb_init_hw_timer initializes the function pointer and values for the hw
2331  * timer found in hardware.
2332  **/
2333 static void igb_init_hw_timer(struct igb_adapter *adapter)
2334 {
2335         struct e1000_hw *hw = &adapter->hw;
2336
2337         switch (hw->mac.type) {
2338         case e1000_i350:
2339         case e1000_82580:
2340                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2341                 adapter->cycles.read = igb_read_clock;
2342                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2343                 adapter->cycles.mult = 1;
2344                 /*
2345                  * The 82580 timesync updates the system timer every 8ns by 8ns
2346                  * and the value cannot be shifted.  Instead we need to shift
2347                  * the registers to generate a 64bit timer value.  As a result
2348                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2349                  * 24 in order to generate a larger value for synchronization.
2350                  */
2351                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2352                 /* disable system timer temporarily by setting bit 31 */
2353                 wr32(E1000_TSAUXC, 0x80000000);
2354                 wrfl();
2355
2356                 /* Set registers so that rollover occurs soon to test this. */
2357                 wr32(E1000_SYSTIMR, 0x00000000);
2358                 wr32(E1000_SYSTIML, 0x80000000);
2359                 wr32(E1000_SYSTIMH, 0x000000FF);
2360                 wrfl();
2361
2362                 /* enable system timer by clearing bit 31 */
2363                 wr32(E1000_TSAUXC, 0x0);
2364                 wrfl();
2365
2366                 timecounter_init(&adapter->clock,
2367                                  &adapter->cycles,
2368                                  ktime_to_ns(ktime_get_real()));
2369                 /*
2370                  * Synchronize our NIC clock against system wall clock. NIC
2371                  * time stamp reading requires ~3us per sample, each sample
2372                  * was pretty stable even under load => only require 10
2373                  * samples for each offset comparison.
2374                  */
2375                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2376                 adapter->compare.source = &adapter->clock;
2377                 adapter->compare.target = ktime_get_real;
2378                 adapter->compare.num_samples = 10;
2379                 timecompare_update(&adapter->compare, 0);
2380                 break;
2381         case e1000_82576:
2382                 /*
2383                  * Initialize hardware timer: we keep it running just in case
2384                  * that some program needs it later on.
2385                  */
2386                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2387                 adapter->cycles.read = igb_read_clock;
2388                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2389                 adapter->cycles.mult = 1;
2390                 /**
2391                  * Scale the NIC clock cycle by a large factor so that
2392                  * relatively small clock corrections can be added or
2393                  * subtracted at each clock tick. The drawbacks of a large
2394                  * factor are a) that the clock register overflows more quickly
2395                  * (not such a big deal) and b) that the increment per tick has
2396                  * to fit into 24 bits.  As a result we need to use a shift of
2397                  * 19 so we can fit a value of 16 into the TIMINCA register.
2398                  */
2399                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2400                 wr32(E1000_TIMINCA,
2401                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2402                                 (16 << IGB_82576_TSYNC_SHIFT));
2403
2404                 /* Set registers so that rollover occurs soon to test this. */
2405                 wr32(E1000_SYSTIML, 0x00000000);
2406                 wr32(E1000_SYSTIMH, 0xFF800000);
2407                 wrfl();
2408
2409                 timecounter_init(&adapter->clock,
2410                                  &adapter->cycles,
2411                                  ktime_to_ns(ktime_get_real()));
2412                 /*
2413                  * Synchronize our NIC clock against system wall clock. NIC
2414                  * time stamp reading requires ~3us per sample, each sample
2415                  * was pretty stable even under load => only require 10
2416                  * samples for each offset comparison.
2417                  */
2418                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2419                 adapter->compare.source = &adapter->clock;
2420                 adapter->compare.target = ktime_get_real;
2421                 adapter->compare.num_samples = 10;
2422                 timecompare_update(&adapter->compare, 0);
2423                 break;
2424         case e1000_82575:
2425                 /* 82575 does not support timesync */
2426         default:
2427                 break;
2428         }
2429
2430 }
2431
2432 /**
2433  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2434  * @adapter: board private structure to initialize
2435  *
2436  * igb_sw_init initializes the Adapter private data structure.
2437  * Fields are initialized based on PCI device information and
2438  * OS network device settings (MTU size).
2439  **/
2440 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2441 {
2442         struct e1000_hw *hw = &adapter->hw;
2443         struct net_device *netdev = adapter->netdev;
2444         struct pci_dev *pdev = adapter->pdev;
2445
2446         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2447
2448         /* set default ring sizes */
2449         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2450         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2451
2452         /* set default ITR values */
2453         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2454         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2455
2456         /* set default work limits */
2457         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2458
2459         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2460                                   VLAN_HLEN;
2461         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2462
2463         adapter->node = -1;
2464
2465         spin_lock_init(&adapter->stats64_lock);
2466 #ifdef CONFIG_PCI_IOV
2467         switch (hw->mac.type) {
2468         case e1000_82576:
2469         case e1000_i350:
2470                 if (max_vfs > 7) {
2471                         dev_warn(&pdev->dev,
2472                                  "Maximum of 7 VFs per PF, using max\n");
2473                         adapter->vfs_allocated_count = 7;
2474                 } else
2475                         adapter->vfs_allocated_count = max_vfs;
2476                 break;
2477         default:
2478                 break;
2479         }
2480 #endif /* CONFIG_PCI_IOV */
2481         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2482         /* i350 cannot do RSS and SR-IOV at the same time */
2483         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2484                 adapter->rss_queues = 1;
2485
2486         /*
2487          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2488          * then we should combine the queues into a queue pair in order to
2489          * conserve interrupts due to limited supply
2490          */
2491         if ((adapter->rss_queues > 4) ||
2492             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2493                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2494
2495         /* This call may decrease the number of queues */
2496         if (igb_init_interrupt_scheme(adapter)) {
2497                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2498                 return -ENOMEM;
2499         }
2500
2501         igb_probe_vfs(adapter);
2502
2503         /* Explicitly disable IRQ since the NIC can be in any state. */
2504         igb_irq_disable(adapter);
2505
2506         if (hw->mac.type == e1000_i350)
2507                 adapter->flags &= ~IGB_FLAG_DMAC;
2508
2509         set_bit(__IGB_DOWN, &adapter->state);
2510         return 0;
2511 }
2512
2513 /**
2514  * igb_open - Called when a network interface is made active
2515  * @netdev: network interface device structure
2516  *
2517  * Returns 0 on success, negative value on failure
2518  *
2519  * The open entry point is called when a network interface is made
2520  * active by the system (IFF_UP).  At this point all resources needed
2521  * for transmit and receive operations are allocated, the interrupt
2522  * handler is registered with the OS, the watchdog timer is started,
2523  * and the stack is notified that the interface is ready.
2524  **/
2525 static int igb_open(struct net_device *netdev)
2526 {
2527         struct igb_adapter *adapter = netdev_priv(netdev);
2528         struct e1000_hw *hw = &adapter->hw;
2529         int err;
2530         int i;
2531
2532         /* disallow open during test */
2533         if (test_bit(__IGB_TESTING, &adapter->state))
2534                 return -EBUSY;
2535
2536         netif_carrier_off(netdev);
2537
2538         /* allocate transmit descriptors */
2539         err = igb_setup_all_tx_resources(adapter);
2540         if (err)
2541                 goto err_setup_tx;
2542
2543         /* allocate receive descriptors */
2544         err = igb_setup_all_rx_resources(adapter);
2545         if (err)
2546                 goto err_setup_rx;
2547
2548         igb_power_up_link(adapter);
2549
2550         /* before we allocate an interrupt, we must be ready to handle it.
2551          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2552          * as soon as we call pci_request_irq, so we have to setup our
2553          * clean_rx handler before we do so.  */
2554         igb_configure(adapter);
2555
2556         err = igb_request_irq(adapter);
2557         if (err)
2558                 goto err_req_irq;
2559
2560         /* From here on the code is the same as igb_up() */
2561         clear_bit(__IGB_DOWN, &adapter->state);
2562
2563         for (i = 0; i < adapter->num_q_vectors; i++) {
2564                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2565                 napi_enable(&q_vector->napi);
2566         }
2567
2568         /* Clear any pending interrupts. */
2569         rd32(E1000_ICR);
2570
2571         igb_irq_enable(adapter);
2572
2573         /* notify VFs that reset has been completed */
2574         if (adapter->vfs_allocated_count) {
2575                 u32 reg_data = rd32(E1000_CTRL_EXT);
2576                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2577                 wr32(E1000_CTRL_EXT, reg_data);
2578         }
2579
2580         netif_tx_start_all_queues(netdev);
2581
2582         /* start the watchdog. */
2583         hw->mac.get_link_status = 1;
2584         schedule_work(&adapter->watchdog_task);
2585
2586         return 0;
2587
2588 err_req_irq:
2589         igb_release_hw_control(adapter);
2590         igb_power_down_link(adapter);
2591         igb_free_all_rx_resources(adapter);
2592 err_setup_rx:
2593         igb_free_all_tx_resources(adapter);
2594 err_setup_tx:
2595         igb_reset(adapter);
2596
2597         return err;
2598 }
2599
2600 /**
2601  * igb_close - Disables a network interface
2602  * @netdev: network interface device structure
2603  *
2604  * Returns 0, this is not allowed to fail
2605  *
2606  * The close entry point is called when an interface is de-activated
2607  * by the OS.  The hardware is still under the driver's control, but
2608  * needs to be disabled.  A global MAC reset is issued to stop the
2609  * hardware, and all transmit and receive resources are freed.
2610  **/
2611 static int igb_close(struct net_device *netdev)
2612 {
2613         struct igb_adapter *adapter = netdev_priv(netdev);
2614
2615         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2616         igb_down(adapter);
2617
2618         igb_free_irq(adapter);
2619
2620         igb_free_all_tx_resources(adapter);
2621         igb_free_all_rx_resources(adapter);
2622
2623         return 0;
2624 }
2625
2626 /**
2627  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2628  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2629  *
2630  * Return 0 on success, negative on failure
2631  **/
2632 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2633 {
2634         struct device *dev = tx_ring->dev;
2635         int orig_node = dev_to_node(dev);
2636         int size;
2637
2638         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2639         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2640         if (!tx_ring->tx_buffer_info)
2641                 tx_ring->tx_buffer_info = vzalloc(size);
2642         if (!tx_ring->tx_buffer_info)
2643                 goto err;
2644
2645         /* round up to nearest 4K */
2646         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2647         tx_ring->size = ALIGN(tx_ring->size, 4096);
2648
2649         set_dev_node(dev, tx_ring->numa_node);
2650         tx_ring->desc = dma_alloc_coherent(dev,
2651                                            tx_ring->size,
2652                                            &tx_ring->dma,
2653                                            GFP_KERNEL);
2654         set_dev_node(dev, orig_node);
2655         if (!tx_ring->desc)
2656                 tx_ring->desc = dma_alloc_coherent(dev,
2657                                                    tx_ring->size,
2658                                                    &tx_ring->dma,
2659                                                    GFP_KERNEL);
2660
2661         if (!tx_ring->desc)
2662                 goto err;
2663
2664         tx_ring->next_to_use = 0;
2665         tx_ring->next_to_clean = 0;
2666
2667         return 0;
2668
2669 err:
2670         vfree(tx_ring->tx_buffer_info);
2671         dev_err(dev,
2672                 "Unable to allocate memory for the transmit descriptor ring\n");
2673         return -ENOMEM;
2674 }
2675
2676 /**
2677  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2678  *                                (Descriptors) for all queues
2679  * @adapter: board private structure
2680  *
2681  * Return 0 on success, negative on failure
2682  **/
2683 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2684 {
2685         struct pci_dev *pdev = adapter->pdev;
2686         int i, err = 0;
2687
2688         for (i = 0; i < adapter->num_tx_queues; i++) {
2689                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2690                 if (err) {
2691                         dev_err(&pdev->dev,
2692                                 "Allocation for Tx Queue %u failed\n", i);
2693                         for (i--; i >= 0; i--)
2694                                 igb_free_tx_resources(adapter->tx_ring[i]);
2695                         break;
2696                 }
2697         }
2698
2699         return err;
2700 }
2701
2702 /**
2703  * igb_setup_tctl - configure the transmit control registers
2704  * @adapter: Board private structure
2705  **/
2706 void igb_setup_tctl(struct igb_adapter *adapter)
2707 {
2708         struct e1000_hw *hw = &adapter->hw;
2709         u32 tctl;
2710
2711         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2712         wr32(E1000_TXDCTL(0), 0);
2713
2714         /* Program the Transmit Control Register */
2715         tctl = rd32(E1000_TCTL);
2716         tctl &= ~E1000_TCTL_CT;
2717         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2718                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2719
2720         igb_config_collision_dist(hw);
2721
2722         /* Enable transmits */
2723         tctl |= E1000_TCTL_EN;
2724
2725         wr32(E1000_TCTL, tctl);
2726 }
2727
2728 /**
2729  * igb_configure_tx_ring - Configure transmit ring after Reset
2730  * @adapter: board private structure
2731  * @ring: tx ring to configure
2732  *
2733  * Configure a transmit ring after a reset.
2734  **/
2735 void igb_configure_tx_ring(struct igb_adapter *adapter,
2736                            struct igb_ring *ring)
2737 {
2738         struct e1000_hw *hw = &adapter->hw;
2739         u32 txdctl = 0;
2740         u64 tdba = ring->dma;
2741         int reg_idx = ring->reg_idx;
2742
2743         /* disable the queue */
2744         wr32(E1000_TXDCTL(reg_idx), 0);
2745         wrfl();
2746         mdelay(10);
2747
2748         wr32(E1000_TDLEN(reg_idx),
2749                         ring->count * sizeof(union e1000_adv_tx_desc));
2750         wr32(E1000_TDBAL(reg_idx),
2751                         tdba & 0x00000000ffffffffULL);
2752         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2753
2754         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2755         wr32(E1000_TDH(reg_idx), 0);
2756         writel(0, ring->tail);
2757
2758         txdctl |= IGB_TX_PTHRESH;
2759         txdctl |= IGB_TX_HTHRESH << 8;
2760         txdctl |= IGB_TX_WTHRESH << 16;
2761
2762         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2763         wr32(E1000_TXDCTL(reg_idx), txdctl);
2764 }
2765
2766 /**
2767  * igb_configure_tx - Configure transmit Unit after Reset
2768  * @adapter: board private structure
2769  *
2770  * Configure the Tx unit of the MAC after a reset.
2771  **/
2772 static void igb_configure_tx(struct igb_adapter *adapter)
2773 {
2774         int i;
2775
2776         for (i = 0; i < adapter->num_tx_queues; i++)
2777                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2778 }
2779
2780 /**
2781  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2782  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2783  *
2784  * Returns 0 on success, negative on failure
2785  **/
2786 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2787 {
2788         struct device *dev = rx_ring->dev;
2789         int orig_node = dev_to_node(dev);
2790         int size, desc_len;
2791
2792         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2793         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2794         if (!rx_ring->rx_buffer_info)
2795                 rx_ring->rx_buffer_info = vzalloc(size);
2796         if (!rx_ring->rx_buffer_info)
2797                 goto err;
2798
2799         desc_len = sizeof(union e1000_adv_rx_desc);
2800
2801         /* Round up to nearest 4K */
2802         rx_ring->size = rx_ring->count * desc_len;
2803         rx_ring->size = ALIGN(rx_ring->size, 4096);
2804
2805         set_dev_node(dev, rx_ring->numa_node);
2806         rx_ring->desc = dma_alloc_coherent(dev,
2807                                            rx_ring->size,
2808                                            &rx_ring->dma,
2809                                            GFP_KERNEL);
2810         set_dev_node(dev, orig_node);
2811         if (!rx_ring->desc)
2812                 rx_ring->desc = dma_alloc_coherent(dev,
2813                                                    rx_ring->size,
2814                                                    &rx_ring->dma,
2815                                                    GFP_KERNEL);
2816
2817         if (!rx_ring->desc)
2818                 goto err;
2819
2820         rx_ring->next_to_clean = 0;
2821         rx_ring->next_to_use = 0;
2822
2823         return 0;
2824
2825 err:
2826         vfree(rx_ring->rx_buffer_info);
2827         rx_ring->rx_buffer_info = NULL;
2828         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2829                 " ring\n");
2830         return -ENOMEM;
2831 }
2832
2833 /**
2834  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2835  *                                (Descriptors) for all queues
2836  * @adapter: board private structure
2837  *
2838  * Return 0 on success, negative on failure
2839  **/
2840 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2841 {
2842         struct pci_dev *pdev = adapter->pdev;
2843         int i, err = 0;
2844
2845         for (i = 0; i < adapter->num_rx_queues; i++) {
2846                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2847                 if (err) {
2848                         dev_err(&pdev->dev,
2849                                 "Allocation for Rx Queue %u failed\n", i);
2850                         for (i--; i >= 0; i--)
2851                                 igb_free_rx_resources(adapter->rx_ring[i]);
2852                         break;
2853                 }
2854         }
2855
2856         return err;
2857 }
2858
2859 /**
2860  * igb_setup_mrqc - configure the multiple receive queue control registers
2861  * @adapter: Board private structure
2862  **/
2863 static void igb_setup_mrqc(struct igb_adapter *adapter)
2864 {
2865         struct e1000_hw *hw = &adapter->hw;
2866         u32 mrqc, rxcsum;
2867         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2868         union e1000_reta {
2869                 u32 dword;
2870                 u8  bytes[4];
2871         } reta;
2872         static const u8 rsshash[40] = {
2873                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2874                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2875                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2876                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2877
2878         /* Fill out hash function seeds */
2879         for (j = 0; j < 10; j++) {
2880                 u32 rsskey = rsshash[(j * 4)];
2881                 rsskey |= rsshash[(j * 4) + 1] << 8;
2882                 rsskey |= rsshash[(j * 4) + 2] << 16;
2883                 rsskey |= rsshash[(j * 4) + 3] << 24;
2884                 array_wr32(E1000_RSSRK(0), j, rsskey);
2885         }
2886
2887         num_rx_queues = adapter->rss_queues;
2888
2889         if (adapter->vfs_allocated_count) {
2890                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2891                 switch (hw->mac.type) {
2892                 case e1000_i350:
2893                 case e1000_82580:
2894                         num_rx_queues = 1;
2895                         shift = 0;
2896                         break;
2897                 case e1000_82576:
2898                         shift = 3;
2899                         num_rx_queues = 2;
2900                         break;
2901                 case e1000_82575:
2902                         shift = 2;
2903                         shift2 = 6;
2904                 default:
2905                         break;
2906                 }
2907         } else {
2908                 if (hw->mac.type == e1000_82575)
2909                         shift = 6;
2910         }
2911
2912         for (j = 0; j < (32 * 4); j++) {
2913                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2914                 if (shift2)
2915                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2916                 if ((j & 3) == 3)
2917                         wr32(E1000_RETA(j >> 2), reta.dword);
2918         }
2919
2920         /*
2921          * Disable raw packet checksumming so that RSS hash is placed in
2922          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2923          * offloads as they are enabled by default
2924          */
2925         rxcsum = rd32(E1000_RXCSUM);
2926         rxcsum |= E1000_RXCSUM_PCSD;
2927
2928         if (adapter->hw.mac.type >= e1000_82576)
2929                 /* Enable Receive Checksum Offload for SCTP */
2930                 rxcsum |= E1000_RXCSUM_CRCOFL;
2931
2932         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2933         wr32(E1000_RXCSUM, rxcsum);
2934
2935         /* If VMDq is enabled then we set the appropriate mode for that, else
2936          * we default to RSS so that an RSS hash is calculated per packet even
2937          * if we are only using one queue */
2938         if (adapter->vfs_allocated_count) {
2939                 if (hw->mac.type > e1000_82575) {
2940                         /* Set the default pool for the PF's first queue */
2941                         u32 vtctl = rd32(E1000_VT_CTL);
2942                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2943                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2944                         vtctl |= adapter->vfs_allocated_count <<
2945                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2946                         wr32(E1000_VT_CTL, vtctl);
2947                 }
2948                 if (adapter->rss_queues > 1)
2949                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2950                 else
2951                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2952         } else {
2953                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2954         }
2955         igb_vmm_control(adapter);
2956
2957         /*
2958          * Generate RSS hash based on TCP port numbers and/or
2959          * IPv4/v6 src and dst addresses since UDP cannot be
2960          * hashed reliably due to IP fragmentation
2961          */
2962         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2963                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2964                 E1000_MRQC_RSS_FIELD_IPV6 |
2965                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2966                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2967
2968         wr32(E1000_MRQC, mrqc);
2969 }
2970
2971 /**
2972  * igb_setup_rctl - configure the receive control registers
2973  * @adapter: Board private structure
2974  **/
2975 void igb_setup_rctl(struct igb_adapter *adapter)
2976 {
2977         struct e1000_hw *hw = &adapter->hw;
2978         u32 rctl;
2979
2980         rctl = rd32(E1000_RCTL);
2981
2982         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2983         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2984
2985         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2986                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2987
2988         /*
2989          * enable stripping of CRC. It's unlikely this will break BMC
2990          * redirection as it did with e1000. Newer features require
2991          * that the HW strips the CRC.
2992          */
2993         rctl |= E1000_RCTL_SECRC;
2994
2995         /* disable store bad packets and clear size bits. */
2996         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2997
2998         /* enable LPE to prevent packets larger than max_frame_size */
2999         rctl |= E1000_RCTL_LPE;
3000
3001         /* disable queue 0 to prevent tail write w/o re-config */
3002         wr32(E1000_RXDCTL(0), 0);
3003
3004         /* Attention!!!  For SR-IOV PF driver operations you must enable
3005          * queue drop for all VF and PF queues to prevent head of line blocking
3006          * if an un-trusted VF does not provide descriptors to hardware.
3007          */
3008         if (adapter->vfs_allocated_count) {
3009                 /* set all queue drop enable bits */
3010                 wr32(E1000_QDE, ALL_QUEUES);
3011         }
3012
3013         wr32(E1000_RCTL, rctl);
3014 }
3015
3016 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3017                                    int vfn)
3018 {
3019         struct e1000_hw *hw = &adapter->hw;
3020         u32 vmolr;
3021
3022         /* if it isn't the PF check to see if VFs are enabled and
3023          * increase the size to support vlan tags */
3024         if (vfn < adapter->vfs_allocated_count &&
3025             adapter->vf_data[vfn].vlans_enabled)
3026                 size += VLAN_TAG_SIZE;
3027
3028         vmolr = rd32(E1000_VMOLR(vfn));
3029         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3030         vmolr |= size | E1000_VMOLR_LPE;
3031         wr32(E1000_VMOLR(vfn), vmolr);
3032
3033         return 0;
3034 }
3035
3036 /**
3037  * igb_rlpml_set - set maximum receive packet size
3038  * @adapter: board private structure
3039  *
3040  * Configure maximum receivable packet size.
3041  **/
3042 static void igb_rlpml_set(struct igb_adapter *adapter)
3043 {
3044         u32 max_frame_size = adapter->max_frame_size;
3045         struct e1000_hw *hw = &adapter->hw;
3046         u16 pf_id = adapter->vfs_allocated_count;
3047
3048         if (pf_id) {
3049                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3050                 /*
3051                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3052                  * to our max jumbo frame size, in case we need to enable
3053                  * jumbo frames on one of the rings later.
3054                  * This will not pass over-length frames into the default
3055                  * queue because it's gated by the VMOLR.RLPML.
3056                  */
3057                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3058         }
3059
3060         wr32(E1000_RLPML, max_frame_size);
3061 }
3062
3063 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3064                                  int vfn, bool aupe)
3065 {
3066         struct e1000_hw *hw = &adapter->hw;
3067         u32 vmolr;
3068
3069         /*
3070          * This register exists only on 82576 and newer so if we are older then
3071          * we should exit and do nothing
3072          */
3073         if (hw->mac.type < e1000_82576)
3074                 return;
3075
3076         vmolr = rd32(E1000_VMOLR(vfn));
3077         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3078         if (aupe)
3079                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3080         else
3081                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3082
3083         /* clear all bits that might not be set */
3084         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3085
3086         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3087                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3088         /*
3089          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3090          * multicast packets
3091          */
3092         if (vfn <= adapter->vfs_allocated_count)
3093                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3094
3095         wr32(E1000_VMOLR(vfn), vmolr);
3096 }
3097
3098 /**
3099  * igb_configure_rx_ring - Configure a receive ring after Reset
3100  * @adapter: board private structure
3101  * @ring: receive ring to be configured
3102  *
3103  * Configure the Rx unit of the MAC after a reset.
3104  **/
3105 void igb_configure_rx_ring(struct igb_adapter *adapter,
3106                            struct igb_ring *ring)
3107 {
3108         struct e1000_hw *hw = &adapter->hw;
3109         u64 rdba = ring->dma;
3110         int reg_idx = ring->reg_idx;
3111         u32 srrctl = 0, rxdctl = 0;
3112
3113         /* disable the queue */
3114         wr32(E1000_RXDCTL(reg_idx), 0);
3115
3116         /* Set DMA base address registers */
3117         wr32(E1000_RDBAL(reg_idx),
3118              rdba & 0x00000000ffffffffULL);
3119         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3120         wr32(E1000_RDLEN(reg_idx),
3121                        ring->count * sizeof(union e1000_adv_rx_desc));
3122
3123         /* initialize head and tail */
3124         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3125         wr32(E1000_RDH(reg_idx), 0);
3126         writel(0, ring->tail);
3127
3128         /* set descriptor configuration */
3129         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3130 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3131         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3132 #else
3133         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3134 #endif
3135         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3136         if (hw->mac.type == e1000_82580)
3137                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3138         /* Only set Drop Enable if we are supporting multiple queues */
3139         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3140                 srrctl |= E1000_SRRCTL_DROP_EN;
3141
3142         wr32(E1000_SRRCTL(reg_idx), srrctl);
3143
3144         /* set filtering for VMDQ pools */
3145         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3146
3147         rxdctl |= IGB_RX_PTHRESH;
3148         rxdctl |= IGB_RX_HTHRESH << 8;
3149         rxdctl |= IGB_RX_WTHRESH << 16;
3150
3151         /* enable receive descriptor fetching */
3152         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3153         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3154 }
3155
3156 /**
3157  * igb_configure_rx - Configure receive Unit after Reset
3158  * @adapter: board private structure
3159  *
3160  * Configure the Rx unit of the MAC after a reset.
3161  **/
3162 static void igb_configure_rx(struct igb_adapter *adapter)
3163 {
3164         int i;
3165
3166         /* set UTA to appropriate mode */
3167         igb_set_uta(adapter);
3168
3169         /* set the correct pool for the PF default MAC address in entry 0 */
3170         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3171                          adapter->vfs_allocated_count);
3172
3173         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3174          * the Base and Length of the Rx Descriptor Ring */
3175         for (i = 0; i < adapter->num_rx_queues; i++)
3176                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3177 }
3178
3179 /**
3180  * igb_free_tx_resources - Free Tx Resources per Queue
3181  * @tx_ring: Tx descriptor ring for a specific queue
3182  *
3183  * Free all transmit software resources
3184  **/
3185 void igb_free_tx_resources(struct igb_ring *tx_ring)
3186 {
3187         igb_clean_tx_ring(tx_ring);
3188
3189         vfree(tx_ring->tx_buffer_info);
3190         tx_ring->tx_buffer_info = NULL;
3191
3192         /* if not set, then don't free */
3193         if (!tx_ring->desc)
3194                 return;
3195
3196         dma_free_coherent(tx_ring->dev, tx_ring->size,
3197                           tx_ring->desc, tx_ring->dma);
3198
3199         tx_ring->desc = NULL;
3200 }
3201
3202 /**
3203  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3204  * @adapter: board private structure
3205  *
3206  * Free all transmit software resources
3207  **/
3208 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3209 {
3210         int i;
3211
3212         for (i = 0; i < adapter->num_tx_queues; i++)
3213                 igb_free_tx_resources(adapter->tx_ring[i]);
3214 }
3215
3216 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3217                                     struct igb_tx_buffer *tx_buffer)
3218 {
3219         if (tx_buffer->skb) {
3220                 dev_kfree_skb_any(tx_buffer->skb);
3221                 if (tx_buffer->dma)
3222                         dma_unmap_single(ring->dev,
3223                                          tx_buffer->dma,
3224                                          tx_buffer->length,
3225                                          DMA_TO_DEVICE);
3226         } else if (tx_buffer->dma) {
3227                 dma_unmap_page(ring->dev,
3228                                tx_buffer->dma,
3229                                tx_buffer->length,
3230                                DMA_TO_DEVICE);
3231         }
3232         tx_buffer->next_to_watch = NULL;
3233         tx_buffer->skb = NULL;
3234         tx_buffer->dma = 0;
3235         /* buffer_info must be completely set up in the transmit path */
3236 }
3237
3238 /**
3239  * igb_clean_tx_ring - Free Tx Buffers
3240  * @tx_ring: ring to be cleaned
3241  **/
3242 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3243 {
3244         struct igb_tx_buffer *buffer_info;
3245         unsigned long size;
3246         u16 i;
3247
3248         if (!tx_ring->tx_buffer_info)
3249                 return;
3250         /* Free all the Tx ring sk_buffs */
3251
3252         for (i = 0; i < tx_ring->count; i++) {
3253                 buffer_info = &tx_ring->tx_buffer_info[i];
3254                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3255         }
3256
3257         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3258         memset(tx_ring->tx_buffer_info, 0, size);
3259
3260         /* Zero out the descriptor ring */
3261         memset(tx_ring->desc, 0, tx_ring->size);
3262
3263         tx_ring->next_to_use = 0;
3264         tx_ring->next_to_clean = 0;
3265 }
3266
3267 /**
3268  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3269  * @adapter: board private structure
3270  **/
3271 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3272 {
3273         int i;
3274
3275         for (i = 0; i < adapter->num_tx_queues; i++)
3276                 igb_clean_tx_ring(adapter->tx_ring[i]);
3277 }
3278
3279 /**
3280  * igb_free_rx_resources - Free Rx Resources
3281  * @rx_ring: ring to clean the resources from
3282  *
3283  * Free all receive software resources
3284  **/
3285 void igb_free_rx_resources(struct igb_ring *rx_ring)
3286 {
3287         igb_clean_rx_ring(rx_ring);
3288
3289         vfree(rx_ring->rx_buffer_info);
3290         rx_ring->rx_buffer_info = NULL;
3291
3292         /* if not set, then don't free */
3293         if (!rx_ring->desc)
3294                 return;
3295
3296         dma_free_coherent(rx_ring->dev, rx_ring->size,
3297                           rx_ring->desc, rx_ring->dma);
3298
3299         rx_ring->desc = NULL;
3300 }
3301
3302 /**
3303  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3304  * @adapter: board private structure
3305  *
3306  * Free all receive software resources
3307  **/
3308 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3309 {
3310         int i;
3311
3312         for (i = 0; i < adapter->num_rx_queues; i++)
3313                 igb_free_rx_resources(adapter->rx_ring[i]);
3314 }
3315
3316 /**
3317  * igb_clean_rx_ring - Free Rx Buffers per Queue
3318  * @rx_ring: ring to free buffers from
3319  **/
3320 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3321 {
3322         unsigned long size;
3323         u16 i;
3324
3325         if (!rx_ring->rx_buffer_info)
3326                 return;
3327
3328         /* Free all the Rx ring sk_buffs */
3329         for (i = 0; i < rx_ring->count; i++) {
3330                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3331                 if (buffer_info->dma) {
3332                         dma_unmap_single(rx_ring->dev,
3333                                          buffer_info->dma,
3334                                          IGB_RX_HDR_LEN,
3335                                          DMA_FROM_DEVICE);
3336                         buffer_info->dma = 0;
3337                 }
3338
3339                 if (buffer_info->skb) {
3340                         dev_kfree_skb(buffer_info->skb);
3341                         buffer_info->skb = NULL;
3342                 }
3343                 if (buffer_info->page_dma) {
3344                         dma_unmap_page(rx_ring->dev,
3345                                        buffer_info->page_dma,
3346                                        PAGE_SIZE / 2,
3347                                        DMA_FROM_DEVICE);
3348                         buffer_info->page_dma = 0;
3349                 }
3350                 if (buffer_info->page) {
3351                         put_page(buffer_info->page);
3352                         buffer_info->page = NULL;
3353                         buffer_info->page_offset = 0;
3354                 }
3355         }
3356
3357         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3358         memset(rx_ring->rx_buffer_info, 0, size);
3359
3360         /* Zero out the descriptor ring */
3361         memset(rx_ring->desc, 0, rx_ring->size);
3362
3363         rx_ring->next_to_clean = 0;
3364         rx_ring->next_to_use = 0;
3365 }
3366
3367 /**
3368  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3369  * @adapter: board private structure
3370  **/
3371 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3372 {
3373         int i;
3374
3375         for (i = 0; i < adapter->num_rx_queues; i++)
3376                 igb_clean_rx_ring(adapter->rx_ring[i]);
3377 }
3378
3379 /**
3380  * igb_set_mac - Change the Ethernet Address of the NIC
3381  * @netdev: network interface device structure
3382  * @p: pointer to an address structure
3383  *
3384  * Returns 0 on success, negative on failure
3385  **/
3386 static int igb_set_mac(struct net_device *netdev, void *p)
3387 {
3388         struct igb_adapter *adapter = netdev_priv(netdev);
3389         struct e1000_hw *hw = &adapter->hw;
3390         struct sockaddr *addr = p;
3391
3392         if (!is_valid_ether_addr(addr->sa_data))
3393                 return -EADDRNOTAVAIL;
3394
3395         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3396         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3397
3398         /* set the correct pool for the new PF MAC address in entry 0 */
3399         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3400                          adapter->vfs_allocated_count);
3401
3402         return 0;
3403 }
3404
3405 /**
3406  * igb_write_mc_addr_list - write multicast addresses to MTA
3407  * @netdev: network interface device structure
3408  *
3409  * Writes multicast address list to the MTA hash table.
3410  * Returns: -ENOMEM on failure
3411  *                0 on no addresses written
3412  *                X on writing X addresses to MTA
3413  **/
3414 static int igb_write_mc_addr_list(struct net_device *netdev)
3415 {
3416         struct igb_adapter *adapter = netdev_priv(netdev);
3417         struct e1000_hw *hw = &adapter->hw;
3418         struct netdev_hw_addr *ha;
3419         u8  *mta_list;
3420         int i;
3421
3422         if (netdev_mc_empty(netdev)) {
3423                 /* nothing to program, so clear mc list */
3424                 igb_update_mc_addr_list(hw, NULL, 0);
3425                 igb_restore_vf_multicasts(adapter);
3426                 return 0;
3427         }
3428
3429         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3430         if (!mta_list)
3431                 return -ENOMEM;
3432
3433         /* The shared function expects a packed array of only addresses. */
3434         i = 0;
3435         netdev_for_each_mc_addr(ha, netdev)
3436                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3437
3438         igb_update_mc_addr_list(hw, mta_list, i);
3439         kfree(mta_list);
3440
3441         return netdev_mc_count(netdev);
3442 }
3443
3444 /**
3445  * igb_write_uc_addr_list - write unicast addresses to RAR table
3446  * @netdev: network interface device structure
3447  *
3448  * Writes unicast address list to the RAR table.
3449  * Returns: -ENOMEM on failure/insufficient address space
3450  *                0 on no addresses written
3451  *                X on writing X addresses to the RAR table
3452  **/
3453 static int igb_write_uc_addr_list(struct net_device *netdev)
3454 {
3455         struct igb_adapter *adapter = netdev_priv(netdev);
3456         struct e1000_hw *hw = &adapter->hw;
3457         unsigned int vfn = adapter->vfs_allocated_count;
3458         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3459         int count = 0;
3460
3461         /* return ENOMEM indicating insufficient memory for addresses */
3462         if (netdev_uc_count(netdev) > rar_entries)
3463                 return -ENOMEM;
3464
3465         if (!netdev_uc_empty(netdev) && rar_entries) {
3466                 struct netdev_hw_addr *ha;
3467
3468                 netdev_for_each_uc_addr(ha, netdev) {
3469                         if (!rar_entries)
3470                                 break;
3471                         igb_rar_set_qsel(adapter, ha->addr,
3472                                          rar_entries--,
3473                                          vfn);
3474                         count++;
3475                 }
3476         }
3477         /* write the addresses in reverse order to avoid write combining */
3478         for (; rar_entries > 0 ; rar_entries--) {
3479                 wr32(E1000_RAH(rar_entries), 0);
3480                 wr32(E1000_RAL(rar_entries), 0);
3481         }
3482         wrfl();
3483
3484         return count;
3485 }
3486
3487 /**
3488  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3489  * @netdev: network interface device structure
3490  *
3491  * The set_rx_mode entry point is called whenever the unicast or multicast
3492  * address lists or the network interface flags are updated.  This routine is
3493  * responsible for configuring the hardware for proper unicast, multicast,
3494  * promiscuous mode, and all-multi behavior.
3495  **/
3496 static void igb_set_rx_mode(struct net_device *netdev)
3497 {
3498         struct igb_adapter *adapter = netdev_priv(netdev);
3499         struct e1000_hw *hw = &adapter->hw;
3500         unsigned int vfn = adapter->vfs_allocated_count;
3501         u32 rctl, vmolr = 0;
3502         int count;
3503
3504         /* Check for Promiscuous and All Multicast modes */
3505         rctl = rd32(E1000_RCTL);
3506
3507         /* clear the effected bits */
3508         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3509
3510         if (netdev->flags & IFF_PROMISC) {
3511                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3512                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3513         } else {
3514                 if (netdev->flags & IFF_ALLMULTI) {
3515                         rctl |= E1000_RCTL_MPE;
3516                         vmolr |= E1000_VMOLR_MPME;
3517                 } else {
3518                         /*
3519                          * Write addresses to the MTA, if the attempt fails
3520                          * then we should just turn on promiscuous mode so
3521                          * that we can at least receive multicast traffic
3522                          */
3523                         count = igb_write_mc_addr_list(netdev);
3524                         if (count < 0) {
3525                                 rctl |= E1000_RCTL_MPE;
3526                                 vmolr |= E1000_VMOLR_MPME;
3527                         } else if (count) {
3528                                 vmolr |= E1000_VMOLR_ROMPE;
3529                         }
3530                 }
3531                 /*
3532                  * Write addresses to available RAR registers, if there is not
3533                  * sufficient space to store all the addresses then enable
3534                  * unicast promiscuous mode
3535                  */
3536                 count = igb_write_uc_addr_list(netdev);
3537                 if (count < 0) {
3538                         rctl |= E1000_RCTL_UPE;
3539                         vmolr |= E1000_VMOLR_ROPE;
3540                 }
3541                 rctl |= E1000_RCTL_VFE;
3542         }
3543         wr32(E1000_RCTL, rctl);
3544
3545         /*
3546          * In order to support SR-IOV and eventually VMDq it is necessary to set
3547          * the VMOLR to enable the appropriate modes.  Without this workaround
3548          * we will have issues with VLAN tag stripping not being done for frames
3549          * that are only arriving because we are the default pool
3550          */
3551         if (hw->mac.type < e1000_82576)
3552                 return;
3553
3554         vmolr |= rd32(E1000_VMOLR(vfn)) &
3555                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3556         wr32(E1000_VMOLR(vfn), vmolr);
3557         igb_restore_vf_multicasts(adapter);
3558 }
3559
3560 static void igb_check_wvbr(struct igb_adapter *adapter)
3561 {
3562         struct e1000_hw *hw = &adapter->hw;
3563         u32 wvbr = 0;
3564
3565         switch (hw->mac.type) {
3566         case e1000_82576:
3567         case e1000_i350:
3568                 if (!(wvbr = rd32(E1000_WVBR)))
3569                         return;
3570                 break;
3571         default:
3572                 break;
3573         }
3574
3575         adapter->wvbr |= wvbr;
3576 }
3577
3578 #define IGB_STAGGERED_QUEUE_OFFSET 8
3579
3580 static void igb_spoof_check(struct igb_adapter *adapter)
3581 {
3582         int j;
3583
3584         if (!adapter->wvbr)
3585                 return;
3586
3587         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3588                 if (adapter->wvbr & (1 << j) ||
3589                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3590                         dev_warn(&adapter->pdev->dev,
3591                                 "Spoof event(s) detected on VF %d\n", j);
3592                         adapter->wvbr &=
3593                                 ~((1 << j) |
3594                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3595                 }
3596         }
3597 }
3598
3599 /* Need to wait a few seconds after link up to get diagnostic information from
3600  * the phy */
3601 static void igb_update_phy_info(unsigned long data)
3602 {
3603         struct igb_adapter *adapter = (struct igb_adapter *) data;
3604         igb_get_phy_info(&adapter->hw);
3605 }
3606
3607 /**
3608  * igb_has_link - check shared code for link and determine up/down
3609  * @adapter: pointer to driver private info
3610  **/
3611 bool igb_has_link(struct igb_adapter *adapter)
3612 {
3613         struct e1000_hw *hw = &adapter->hw;
3614         bool link_active = false;
3615         s32 ret_val = 0;
3616
3617         /* get_link_status is set on LSC (link status) interrupt or
3618          * rx sequence error interrupt.  get_link_status will stay
3619          * false until the e1000_check_for_link establishes link
3620          * for copper adapters ONLY
3621          */
3622         switch (hw->phy.media_type) {
3623         case e1000_media_type_copper:
3624                 if (hw->mac.get_link_status) {
3625                         ret_val = hw->mac.ops.check_for_link(hw);
3626                         link_active = !hw->mac.get_link_status;
3627                 } else {
3628                         link_active = true;
3629                 }
3630                 break;
3631         case e1000_media_type_internal_serdes:
3632                 ret_val = hw->mac.ops.check_for_link(hw);
3633                 link_active = hw->mac.serdes_has_link;
3634                 break;
3635         default:
3636         case e1000_media_type_unknown:
3637                 break;
3638         }
3639
3640         return link_active;
3641 }
3642
3643 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3644 {
3645         bool ret = false;
3646         u32 ctrl_ext, thstat;
3647
3648         /* check for thermal sensor event on i350, copper only */
3649         if (hw->mac.type == e1000_i350) {
3650                 thstat = rd32(E1000_THSTAT);
3651                 ctrl_ext = rd32(E1000_CTRL_EXT);
3652
3653                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3654                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3655                         ret = !!(thstat & event);
3656                 }
3657         }
3658
3659         return ret;
3660 }
3661
3662 /**
3663  * igb_watchdog - Timer Call-back
3664  * @data: pointer to adapter cast into an unsigned long
3665  **/
3666 static void igb_watchdog(unsigned long data)
3667 {
3668         struct igb_adapter *adapter = (struct igb_adapter *)data;
3669         /* Do the rest outside of interrupt context */
3670         schedule_work(&adapter->watchdog_task);
3671 }
3672
3673 static void igb_watchdog_task(struct work_struct *work)
3674 {
3675         struct igb_adapter *adapter = container_of(work,
3676                                                    struct igb_adapter,
3677                                                    watchdog_task);
3678         struct e1000_hw *hw = &adapter->hw;
3679         struct net_device *netdev = adapter->netdev;
3680         u32 link;
3681         int i;
3682
3683         link = igb_has_link(adapter);
3684         if (link) {
3685                 if (!netif_carrier_ok(netdev)) {
3686                         u32 ctrl;
3687                         hw->mac.ops.get_speed_and_duplex(hw,
3688                                                          &adapter->link_speed,
3689                                                          &adapter->link_duplex);
3690
3691                         ctrl = rd32(E1000_CTRL);
3692                         /* Links status message must follow this format */
3693                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3694                                  "Flow Control: %s\n",
3695                                netdev->name,
3696                                adapter->link_speed,
3697                                adapter->link_duplex == FULL_DUPLEX ?
3698                                  "Full Duplex" : "Half Duplex",
3699                                ((ctrl & E1000_CTRL_TFCE) &&
3700                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3701                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3702                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3703
3704                         /* check for thermal sensor event */
3705                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3706                                 printk(KERN_INFO "igb: %s The network adapter "
3707                                                  "link speed was downshifted "
3708                                                  "because it overheated.\n",
3709                                                  netdev->name);
3710                         }
3711
3712                         /* adjust timeout factor according to speed/duplex */
3713                         adapter->tx_timeout_factor = 1;
3714                         switch (adapter->link_speed) {
3715                         case SPEED_10:
3716                                 adapter->tx_timeout_factor = 14;
3717                                 break;
3718                         case SPEED_100:
3719                                 /* maybe add some timeout factor ? */
3720                                 break;
3721                         }
3722
3723                         netif_carrier_on(netdev);
3724
3725                         igb_ping_all_vfs(adapter);
3726                         igb_check_vf_rate_limit(adapter);
3727
3728                         /* link state has changed, schedule phy info update */
3729                         if (!test_bit(__IGB_DOWN, &adapter->state))
3730                                 mod_timer(&adapter->phy_info_timer,
3731                                           round_jiffies(jiffies + 2 * HZ));
3732                 }
3733         } else {
3734                 if (netif_carrier_ok(netdev)) {
3735                         adapter->link_speed = 0;
3736                         adapter->link_duplex = 0;
3737
3738                         /* check for thermal sensor event */
3739                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3740                                 printk(KERN_ERR "igb: %s The network adapter "
3741                                                 "was stopped because it "
3742                                                 "overheated.\n",
3743                                                 netdev->name);
3744                         }
3745
3746                         /* Links status message must follow this format */
3747                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3748                                netdev->name);
3749                         netif_carrier_off(netdev);
3750
3751                         igb_ping_all_vfs(adapter);
3752
3753                         /* link state has changed, schedule phy info update */
3754                         if (!test_bit(__IGB_DOWN, &adapter->state))
3755                                 mod_timer(&adapter->phy_info_timer,
3756                                           round_jiffies(jiffies + 2 * HZ));
3757                 }
3758         }
3759
3760         spin_lock(&adapter->stats64_lock);
3761         igb_update_stats(adapter, &adapter->stats64);
3762         spin_unlock(&adapter->stats64_lock);
3763
3764         for (i = 0; i < adapter->num_tx_queues; i++) {
3765                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3766                 if (!netif_carrier_ok(netdev)) {
3767                         /* We've lost link, so the controller stops DMA,
3768                          * but we've got queued Tx work that's never going
3769                          * to get done, so reset controller to flush Tx.
3770                          * (Do the reset outside of interrupt context). */
3771                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3772                                 adapter->tx_timeout_count++;
3773                                 schedule_work(&adapter->reset_task);
3774                                 /* return immediately since reset is imminent */
3775                                 return;
3776                         }
3777                 }
3778
3779                 /* Force detection of hung controller every watchdog period */
3780                 tx_ring->detect_tx_hung = true;
3781         }
3782
3783         /* Cause software interrupt to ensure rx ring is cleaned */
3784         if (adapter->msix_entries) {
3785                 u32 eics = 0;
3786                 for (i = 0; i < adapter->num_q_vectors; i++) {
3787                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3788                         eics |= q_vector->eims_value;
3789                 }
3790                 wr32(E1000_EICS, eics);
3791         } else {
3792                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3793         }
3794
3795         igb_spoof_check(adapter);
3796
3797         /* Reset the timer */
3798         if (!test_bit(__IGB_DOWN, &adapter->state))
3799                 mod_timer(&adapter->watchdog_timer,
3800                           round_jiffies(jiffies + 2 * HZ));
3801 }
3802
3803 enum latency_range {
3804         lowest_latency = 0,
3805         low_latency = 1,
3806         bulk_latency = 2,
3807         latency_invalid = 255
3808 };
3809
3810 /**
3811  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3812  *
3813  *      Stores a new ITR value based on strictly on packet size.  This
3814  *      algorithm is less sophisticated than that used in igb_update_itr,
3815  *      due to the difficulty of synchronizing statistics across multiple
3816  *      receive rings.  The divisors and thresholds used by this function
3817  *      were determined based on theoretical maximum wire speed and testing
3818  *      data, in order to minimize response time while increasing bulk
3819  *      throughput.
3820  *      This functionality is controlled by the InterruptThrottleRate module
3821  *      parameter (see igb_param.c)
3822  *      NOTE:  This function is called only when operating in a multiqueue
3823  *             receive environment.
3824  * @q_vector: pointer to q_vector
3825  **/
3826 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3827 {
3828         int new_val = q_vector->itr_val;
3829         int avg_wire_size = 0;
3830         struct igb_adapter *adapter = q_vector->adapter;
3831         unsigned int packets;
3832
3833         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3834          * ints/sec - ITR timer value of 120 ticks.
3835          */
3836         if (adapter->link_speed != SPEED_1000) {
3837                 new_val = IGB_4K_ITR;
3838                 goto set_itr_val;
3839         }
3840
3841         packets = q_vector->rx.total_packets;
3842         if (packets)
3843                 avg_wire_size = q_vector->rx.total_bytes / packets;
3844
3845         packets = q_vector->tx.total_packets;
3846         if (packets)
3847                 avg_wire_size = max_t(u32, avg_wire_size,
3848                                       q_vector->tx.total_bytes / packets);
3849
3850         /* if avg_wire_size isn't set no work was done */
3851         if (!avg_wire_size)
3852                 goto clear_counts;
3853
3854         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3855         avg_wire_size += 24;
3856
3857         /* Don't starve jumbo frames */
3858         avg_wire_size = min(avg_wire_size, 3000);
3859
3860         /* Give a little boost to mid-size frames */
3861         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3862                 new_val = avg_wire_size / 3;
3863         else
3864                 new_val = avg_wire_size / 2;
3865
3866         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3867         if (new_val < IGB_20K_ITR &&
3868             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3869              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3870                 new_val = IGB_20K_ITR;
3871
3872 set_itr_val:
3873         if (new_val != q_vector->itr_val) {
3874                 q_vector->itr_val = new_val;
3875                 q_vector->set_itr = 1;
3876         }
3877 clear_counts:
3878         q_vector->rx.total_bytes = 0;
3879         q_vector->rx.total_packets = 0;
3880         q_vector->tx.total_bytes = 0;
3881         q_vector->tx.total_packets = 0;
3882 }
3883
3884 /**
3885  * igb_update_itr - update the dynamic ITR value based on statistics
3886  *      Stores a new ITR value based on packets and byte
3887  *      counts during the last interrupt.  The advantage of per interrupt
3888  *      computation is faster updates and more accurate ITR for the current
3889  *      traffic pattern.  Constants in this function were computed
3890  *      based on theoretical maximum wire speed and thresholds were set based
3891  *      on testing data as well as attempting to minimize response time
3892  *      while increasing bulk throughput.
3893  *      this functionality is controlled by the InterruptThrottleRate module
3894  *      parameter (see igb_param.c)
3895  *      NOTE:  These calculations are only valid when operating in a single-
3896  *             queue environment.
3897  * @q_vector: pointer to q_vector
3898  * @ring_container: ring info to update the itr for
3899  **/
3900 static void igb_update_itr(struct igb_q_vector *q_vector,
3901                            struct igb_ring_container *ring_container)
3902 {
3903         unsigned int packets = ring_container->total_packets;
3904         unsigned int bytes = ring_container->total_bytes;
3905         u8 itrval = ring_container->itr;
3906
3907         /* no packets, exit with status unchanged */
3908         if (packets == 0)
3909                 return;
3910
3911         switch (itrval) {
3912         case lowest_latency:
3913                 /* handle TSO and jumbo frames */
3914                 if (bytes/packets > 8000)
3915                         itrval = bulk_latency;
3916                 else if ((packets < 5) && (bytes > 512))
3917                         itrval = low_latency;
3918                 break;
3919         case low_latency:  /* 50 usec aka 20000 ints/s */
3920                 if (bytes > 10000) {
3921                         /* this if handles the TSO accounting */
3922                         if (bytes/packets > 8000) {
3923                                 itrval = bulk_latency;
3924                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3925                                 itrval = bulk_latency;
3926                         } else if ((packets > 35)) {
3927                                 itrval = lowest_latency;
3928                         }
3929                 } else if (bytes/packets > 2000) {
3930                         itrval = bulk_latency;
3931                 } else if (packets <= 2 && bytes < 512) {
3932                         itrval = lowest_latency;
3933                 }
3934                 break;
3935         case bulk_latency: /* 250 usec aka 4000 ints/s */
3936                 if (bytes > 25000) {
3937                         if (packets > 35)
3938                                 itrval = low_latency;
3939                 } else if (bytes < 1500) {
3940                         itrval = low_latency;
3941                 }
3942                 break;
3943         }
3944
3945         /* clear work counters since we have the values we need */
3946         ring_container->total_bytes = 0;
3947         ring_container->total_packets = 0;
3948
3949         /* write updated itr to ring container */
3950         ring_container->itr = itrval;
3951 }
3952
3953 static void igb_set_itr(struct igb_q_vector *q_vector)
3954 {
3955         struct igb_adapter *adapter = q_vector->adapter;
3956         u32 new_itr = q_vector->itr_val;
3957         u8 current_itr = 0;
3958
3959         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3960         if (adapter->link_speed != SPEED_1000) {
3961                 current_itr = 0;
3962                 new_itr = IGB_4K_ITR;
3963                 goto set_itr_now;
3964         }
3965
3966         igb_update_itr(q_vector, &q_vector->tx);
3967         igb_update_itr(q_vector, &q_vector->rx);
3968
3969         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3970
3971         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3972         if (current_itr == lowest_latency &&
3973             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3974              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3975                 current_itr = low_latency;
3976
3977         switch (current_itr) {
3978         /* counts and packets in update_itr are dependent on these numbers */
3979         case lowest_latency:
3980                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3981                 break;
3982         case low_latency:
3983                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3984                 break;
3985         case bulk_latency:
3986                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3987                 break;
3988         default:
3989                 break;
3990         }
3991
3992 set_itr_now:
3993         if (new_itr != q_vector->itr_val) {
3994                 /* this attempts to bias the interrupt rate towards Bulk
3995                  * by adding intermediate steps when interrupt rate is
3996                  * increasing */
3997                 new_itr = new_itr > q_vector->itr_val ?
3998                              max((new_itr * q_vector->itr_val) /
3999                                  (new_itr + (q_vector->itr_val >> 2)),
4000                                  new_itr) :
4001                              new_itr;
4002                 /* Don't write the value here; it resets the adapter's
4003                  * internal timer, and causes us to delay far longer than
4004                  * we should between interrupts.  Instead, we write the ITR
4005                  * value at the beginning of the next interrupt so the timing
4006                  * ends up being correct.
4007                  */
4008                 q_vector->itr_val = new_itr;
4009                 q_vector->set_itr = 1;
4010         }
4011 }
4012
4013 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4014                      u32 type_tucmd, u32 mss_l4len_idx)
4015 {
4016         struct e1000_adv_tx_context_desc *context_desc;
4017         u16 i = tx_ring->next_to_use;
4018
4019         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4020
4021         i++;
4022         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4023
4024         /* set bits to identify this as an advanced context descriptor */
4025         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4026
4027         /* For 82575, context index must be unique per ring. */
4028         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4029                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4030
4031         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4032         context_desc->seqnum_seed       = 0;
4033         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4034         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4035 }
4036
4037 static int igb_tso(struct igb_ring *tx_ring,
4038                    struct igb_tx_buffer *first,
4039                    u8 *hdr_len)
4040 {
4041         struct sk_buff *skb = first->skb;
4042         u32 vlan_macip_lens, type_tucmd;
4043         u32 mss_l4len_idx, l4len;
4044
4045         if (!skb_is_gso(skb))
4046                 return 0;
4047
4048         if (skb_header_cloned(skb)) {
4049                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4050                 if (err)
4051                         return err;
4052         }
4053
4054         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4055         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4056
4057         if (first->protocol == __constant_htons(ETH_P_IP)) {
4058                 struct iphdr *iph = ip_hdr(skb);
4059                 iph->tot_len = 0;
4060                 iph->check = 0;
4061                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4062                                                          iph->daddr, 0,
4063                                                          IPPROTO_TCP,
4064                                                          0);
4065                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4066                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4067                                    IGB_TX_FLAGS_CSUM |
4068                                    IGB_TX_FLAGS_IPV4;
4069         } else if (skb_is_gso_v6(skb)) {
4070                 ipv6_hdr(skb)->payload_len = 0;
4071                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4072                                                        &ipv6_hdr(skb)->daddr,
4073                                                        0, IPPROTO_TCP, 0);
4074                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4075                                    IGB_TX_FLAGS_CSUM;
4076         }
4077
4078         /* compute header lengths */
4079         l4len = tcp_hdrlen(skb);
4080         *hdr_len = skb_transport_offset(skb) + l4len;
4081
4082         /* update gso size and bytecount with header size */
4083         first->gso_segs = skb_shinfo(skb)->gso_segs;
4084         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4085
4086         /* MSS L4LEN IDX */
4087         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4088         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4089
4090         /* VLAN MACLEN IPLEN */
4091         vlan_macip_lens = skb_network_header_len(skb);
4092         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4093         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4094
4095         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4096
4097         return 1;
4098 }
4099
4100 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4101 {
4102         struct sk_buff *skb = first->skb;
4103         u32 vlan_macip_lens = 0;
4104         u32 mss_l4len_idx = 0;
4105         u32 type_tucmd = 0;
4106
4107         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4108                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4109                         return;
4110         } else {
4111                 u8 l4_hdr = 0;
4112                 switch (first->protocol) {
4113                 case __constant_htons(ETH_P_IP):
4114                         vlan_macip_lens |= skb_network_header_len(skb);
4115                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4116                         l4_hdr = ip_hdr(skb)->protocol;
4117                         break;
4118                 case __constant_htons(ETH_P_IPV6):
4119                         vlan_macip_lens |= skb_network_header_len(skb);
4120                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4121                         break;
4122                 default:
4123                         if (unlikely(net_ratelimit())) {
4124                                 dev_warn(tx_ring->dev,
4125                                  "partial checksum but proto=%x!\n",
4126                                  first->protocol);
4127                         }
4128                         break;
4129                 }
4130
4131                 switch (l4_hdr) {
4132                 case IPPROTO_TCP:
4133                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4134                         mss_l4len_idx = tcp_hdrlen(skb) <<
4135                                         E1000_ADVTXD_L4LEN_SHIFT;
4136                         break;
4137                 case IPPROTO_SCTP:
4138                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4139                         mss_l4len_idx = sizeof(struct sctphdr) <<
4140                                         E1000_ADVTXD_L4LEN_SHIFT;
4141                         break;
4142                 case IPPROTO_UDP:
4143                         mss_l4len_idx = sizeof(struct udphdr) <<
4144                                         E1000_ADVTXD_L4LEN_SHIFT;
4145                         break;
4146                 default:
4147                         if (unlikely(net_ratelimit())) {
4148                                 dev_warn(tx_ring->dev,
4149                                  "partial checksum but l4 proto=%x!\n",
4150                                  l4_hdr);
4151                         }
4152                         break;
4153                 }
4154
4155                 /* update TX checksum flag */
4156                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4157         }
4158
4159         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4160         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4161
4162         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4163 }
4164
4165 static __le32 igb_tx_cmd_type(u32 tx_flags)
4166 {
4167         /* set type for advanced descriptor with frame checksum insertion */
4168         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4169                                       E1000_ADVTXD_DCMD_IFCS |
4170                                       E1000_ADVTXD_DCMD_DEXT);
4171
4172         /* set HW vlan bit if vlan is present */
4173         if (tx_flags & IGB_TX_FLAGS_VLAN)
4174                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4175
4176         /* set timestamp bit if present */
4177         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4178                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4179
4180         /* set segmentation bits for TSO */
4181         if (tx_flags & IGB_TX_FLAGS_TSO)
4182                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4183
4184         return cmd_type;
4185 }
4186
4187 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4188                                  union e1000_adv_tx_desc *tx_desc,
4189                                  u32 tx_flags, unsigned int paylen)
4190 {
4191         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4192
4193         /* 82575 requires a unique index per ring if any offload is enabled */
4194         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4195             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4196                 olinfo_status |= tx_ring->reg_idx << 4;
4197
4198         /* insert L4 checksum */
4199         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4200                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4201
4202                 /* insert IPv4 checksum */
4203                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4204                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4205         }
4206
4207         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4208 }
4209
4210 /*
4211  * The largest size we can write to the descriptor is 65535.  In order to
4212  * maintain a power of two alignment we have to limit ourselves to 32K.
4213  */
4214 #define IGB_MAX_TXD_PWR 15
4215 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4216
4217 static void igb_tx_map(struct igb_ring *tx_ring,
4218                        struct igb_tx_buffer *first,
4219                        const u8 hdr_len)
4220 {
4221         struct sk_buff *skb = first->skb;
4222         struct igb_tx_buffer *tx_buffer_info;
4223         union e1000_adv_tx_desc *tx_desc;
4224         dma_addr_t dma;
4225         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4226         unsigned int data_len = skb->data_len;
4227         unsigned int size = skb_headlen(skb);
4228         unsigned int paylen = skb->len - hdr_len;
4229         __le32 cmd_type;
4230         u32 tx_flags = first->tx_flags;
4231         u16 i = tx_ring->next_to_use;
4232
4233         tx_desc = IGB_TX_DESC(tx_ring, i);
4234
4235         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4236         cmd_type = igb_tx_cmd_type(tx_flags);
4237
4238         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4239         if (dma_mapping_error(tx_ring->dev, dma))
4240                 goto dma_error;
4241
4242         /* record length, and DMA address */
4243         first->length = size;
4244         first->dma = dma;
4245         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4246
4247         for (;;) {
4248                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4249                         tx_desc->read.cmd_type_len =
4250                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4251
4252                         i++;
4253                         tx_desc++;
4254                         if (i == tx_ring->count) {
4255                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4256                                 i = 0;
4257                         }
4258
4259                         dma += IGB_MAX_DATA_PER_TXD;
4260                         size -= IGB_MAX_DATA_PER_TXD;
4261
4262                         tx_desc->read.olinfo_status = 0;
4263                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4264                 }
4265
4266                 if (likely(!data_len))
4267                         break;
4268
4269                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4270
4271                 i++;
4272                 tx_desc++;
4273                 if (i == tx_ring->count) {
4274                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4275                         i = 0;
4276                 }
4277
4278                 size = frag->size;
4279                 data_len -= size;
4280
4281                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4282                                    size, DMA_TO_DEVICE);
4283                 if (dma_mapping_error(tx_ring->dev, dma))
4284                         goto dma_error;
4285
4286                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4287                 tx_buffer_info->length = size;
4288                 tx_buffer_info->dma = dma;
4289
4290                 tx_desc->read.olinfo_status = 0;
4291                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4292
4293                 frag++;
4294         }
4295
4296         /* write last descriptor with RS and EOP bits */
4297         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4298         tx_desc->read.cmd_type_len = cmd_type;
4299
4300         /* set the timestamp */
4301         first->time_stamp = jiffies;
4302
4303         /*
4304          * Force memory writes to complete before letting h/w know there
4305          * are new descriptors to fetch.  (Only applicable for weak-ordered
4306          * memory model archs, such as IA-64).
4307          *
4308          * We also need this memory barrier to make certain all of the
4309          * status bits have been updated before next_to_watch is written.
4310          */
4311         wmb();
4312
4313         /* set next_to_watch value indicating a packet is present */
4314         first->next_to_watch = tx_desc;
4315
4316         i++;
4317         if (i == tx_ring->count)
4318                 i = 0;
4319
4320         tx_ring->next_to_use = i;
4321
4322         writel(i, tx_ring->tail);
4323
4324         /* we need this if more than one processor can write to our tail
4325          * at a time, it syncronizes IO on IA64/Altix systems */
4326         mmiowb();
4327
4328         return;
4329
4330 dma_error:
4331         dev_err(tx_ring->dev, "TX DMA map failed\n");
4332
4333         /* clear dma mappings for failed tx_buffer_info map */
4334         for (;;) {
4335                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4336                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4337                 if (tx_buffer_info == first)
4338                         break;
4339                 if (i == 0)
4340                         i = tx_ring->count;
4341                 i--;
4342         }
4343
4344         tx_ring->next_to_use = i;
4345 }
4346
4347 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4348 {
4349         struct net_device *netdev = tx_ring->netdev;
4350
4351         netif_stop_subqueue(netdev, tx_ring->queue_index);
4352
4353         /* Herbert's original patch had:
4354          *  smp_mb__after_netif_stop_queue();
4355          * but since that doesn't exist yet, just open code it. */
4356         smp_mb();
4357
4358         /* We need to check again in a case another CPU has just
4359          * made room available. */
4360         if (igb_desc_unused(tx_ring) < size)
4361                 return -EBUSY;
4362
4363         /* A reprieve! */
4364         netif_wake_subqueue(netdev, tx_ring->queue_index);
4365
4366         u64_stats_update_begin(&tx_ring->tx_syncp2);
4367         tx_ring->tx_stats.restart_queue2++;
4368         u64_stats_update_end(&tx_ring->tx_syncp2);
4369
4370         return 0;
4371 }
4372
4373 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4374 {
4375         if (igb_desc_unused(tx_ring) >= size)
4376                 return 0;
4377         return __igb_maybe_stop_tx(tx_ring, size);
4378 }
4379
4380 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4381                                 struct igb_ring *tx_ring)
4382 {
4383         struct igb_tx_buffer *first;
4384         int tso;
4385         u32 tx_flags = 0;
4386         __be16 protocol = vlan_get_protocol(skb);
4387         u8 hdr_len = 0;
4388
4389         /* need: 1 descriptor per page,
4390          *       + 2 desc gap to keep tail from touching head,
4391          *       + 1 desc for skb->data,
4392          *       + 1 desc for context descriptor,
4393          * otherwise try next time */
4394         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4395                 /* this is a hard error */
4396                 return NETDEV_TX_BUSY;
4397         }
4398
4399         /* record the location of the first descriptor for this packet */
4400         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4401         first->skb = skb;
4402         first->bytecount = skb->len;
4403         first->gso_segs = 1;
4404
4405         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4406                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4407                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4408         }
4409
4410         if (vlan_tx_tag_present(skb)) {
4411                 tx_flags |= IGB_TX_FLAGS_VLAN;
4412                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4413         }
4414
4415         /* record initial flags and protocol */
4416         first->tx_flags = tx_flags;
4417         first->protocol = protocol;
4418
4419         tso = igb_tso(tx_ring, first, &hdr_len);
4420         if (tso < 0)
4421                 goto out_drop;
4422         else if (!tso)
4423                 igb_tx_csum(tx_ring, first);
4424
4425         igb_tx_map(tx_ring, first, hdr_len);
4426
4427         /* Make sure there is space in the ring for the next send. */
4428         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4429
4430         return NETDEV_TX_OK;
4431
4432 out_drop:
4433         igb_unmap_and_free_tx_resource(tx_ring, first);
4434
4435         return NETDEV_TX_OK;
4436 }
4437
4438 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4439                                                     struct sk_buff *skb)
4440 {
4441         unsigned int r_idx = skb->queue_mapping;
4442
4443         if (r_idx >= adapter->num_tx_queues)
4444                 r_idx = r_idx % adapter->num_tx_queues;
4445
4446         return adapter->tx_ring[r_idx];
4447 }
4448
4449 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4450                                   struct net_device *netdev)
4451 {
4452         struct igb_adapter *adapter = netdev_priv(netdev);
4453
4454         if (test_bit(__IGB_DOWN, &adapter->state)) {
4455                 dev_kfree_skb_any(skb);
4456                 return NETDEV_TX_OK;
4457         }
4458
4459         if (skb->len <= 0) {
4460                 dev_kfree_skb_any(skb);
4461                 return NETDEV_TX_OK;
4462         }
4463
4464         /*
4465          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4466          * in order to meet this minimum size requirement.
4467          */
4468         if (skb->len < 17) {
4469                 if (skb_padto(skb, 17))
4470                         return NETDEV_TX_OK;
4471                 skb->len = 17;
4472         }
4473
4474         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4475 }
4476
4477 /**
4478  * igb_tx_timeout - Respond to a Tx Hang
4479  * @netdev: network interface device structure
4480  **/
4481 static void igb_tx_timeout(struct net_device *netdev)
4482 {
4483         struct igb_adapter *adapter = netdev_priv(netdev);
4484         struct e1000_hw *hw = &adapter->hw;
4485
4486         /* Do the reset outside of interrupt context */
4487         adapter->tx_timeout_count++;
4488
4489         if (hw->mac.type == e1000_82580)
4490                 hw->dev_spec._82575.global_device_reset = true;
4491
4492         schedule_work(&adapter->reset_task);
4493         wr32(E1000_EICS,
4494              (adapter->eims_enable_mask & ~adapter->eims_other));
4495 }
4496
4497 static void igb_reset_task(struct work_struct *work)
4498 {
4499         struct igb_adapter *adapter;
4500         adapter = container_of(work, struct igb_adapter, reset_task);
4501
4502         igb_dump(adapter);
4503         netdev_err(adapter->netdev, "Reset adapter\n");
4504         igb_reinit_locked(adapter);
4505 }
4506
4507 /**
4508  * igb_get_stats64 - Get System Network Statistics
4509  * @netdev: network interface device structure
4510  * @stats: rtnl_link_stats64 pointer
4511  *
4512  **/
4513 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4514                                                  struct rtnl_link_stats64 *stats)
4515 {
4516         struct igb_adapter *adapter = netdev_priv(netdev);
4517
4518         spin_lock(&adapter->stats64_lock);
4519         igb_update_stats(adapter, &adapter->stats64);
4520         memcpy(stats, &adapter->stats64, sizeof(*stats));
4521         spin_unlock(&adapter->stats64_lock);
4522
4523         return stats;
4524 }
4525
4526 /**
4527  * igb_change_mtu - Change the Maximum Transfer Unit
4528  * @netdev: network interface device structure
4529  * @new_mtu: new value for maximum frame size
4530  *
4531  * Returns 0 on success, negative on failure
4532  **/
4533 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4534 {
4535         struct igb_adapter *adapter = netdev_priv(netdev);
4536         struct pci_dev *pdev = adapter->pdev;
4537         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4538
4539         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4540                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4541                 return -EINVAL;
4542         }
4543
4544 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4545         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4546                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4547                 return -EINVAL;
4548         }
4549
4550         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4551                 msleep(1);
4552
4553         /* igb_down has a dependency on max_frame_size */
4554         adapter->max_frame_size = max_frame;
4555
4556         if (netif_running(netdev))
4557                 igb_down(adapter);
4558
4559         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4560                  netdev->mtu, new_mtu);
4561         netdev->mtu = new_mtu;
4562
4563         if (netif_running(netdev))
4564                 igb_up(adapter);
4565         else
4566                 igb_reset(adapter);
4567
4568         clear_bit(__IGB_RESETTING, &adapter->state);
4569
4570         return 0;
4571 }
4572
4573 /**
4574  * igb_update_stats - Update the board statistics counters
4575  * @adapter: board private structure
4576  **/
4577
4578 void igb_update_stats(struct igb_adapter *adapter,
4579                       struct rtnl_link_stats64 *net_stats)
4580 {
4581         struct e1000_hw *hw = &adapter->hw;
4582         struct pci_dev *pdev = adapter->pdev;
4583         u32 reg, mpc;
4584         u16 phy_tmp;
4585         int i;
4586         u64 bytes, packets;
4587         unsigned int start;
4588         u64 _bytes, _packets;
4589
4590 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4591
4592         /*
4593          * Prevent stats update while adapter is being reset, or if the pci
4594          * connection is down.
4595          */
4596         if (adapter->link_speed == 0)
4597                 return;
4598         if (pci_channel_offline(pdev))
4599                 return;
4600
4601         bytes = 0;
4602         packets = 0;
4603         for (i = 0; i < adapter->num_rx_queues; i++) {
4604                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4605                 struct igb_ring *ring = adapter->rx_ring[i];
4606
4607                 ring->rx_stats.drops += rqdpc_tmp;
4608                 net_stats->rx_fifo_errors += rqdpc_tmp;
4609
4610                 do {
4611                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4612                         _bytes = ring->rx_stats.bytes;
4613                         _packets = ring->rx_stats.packets;
4614                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4615                 bytes += _bytes;
4616                 packets += _packets;
4617         }
4618
4619         net_stats->rx_bytes = bytes;
4620         net_stats->rx_packets = packets;
4621
4622         bytes = 0;
4623         packets = 0;
4624         for (i = 0; i < adapter->num_tx_queues; i++) {
4625                 struct igb_ring *ring = adapter->tx_ring[i];
4626                 do {
4627                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4628                         _bytes = ring->tx_stats.bytes;
4629                         _packets = ring->tx_stats.packets;
4630                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4631                 bytes += _bytes;
4632                 packets += _packets;
4633         }
4634         net_stats->tx_bytes = bytes;
4635         net_stats->tx_packets = packets;
4636
4637         /* read stats registers */
4638         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4639         adapter->stats.gprc += rd32(E1000_GPRC);
4640         adapter->stats.gorc += rd32(E1000_GORCL);
4641         rd32(E1000_GORCH); /* clear GORCL */
4642         adapter->stats.bprc += rd32(E1000_BPRC);
4643         adapter->stats.mprc += rd32(E1000_MPRC);
4644         adapter->stats.roc += rd32(E1000_ROC);
4645
4646         adapter->stats.prc64 += rd32(E1000_PRC64);
4647         adapter->stats.prc127 += rd32(E1000_PRC127);
4648         adapter->stats.prc255 += rd32(E1000_PRC255);
4649         adapter->stats.prc511 += rd32(E1000_PRC511);
4650         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4651         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4652         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4653         adapter->stats.sec += rd32(E1000_SEC);
4654
4655         mpc = rd32(E1000_MPC);
4656         adapter->stats.mpc += mpc;
4657         net_stats->rx_fifo_errors += mpc;
4658         adapter->stats.scc += rd32(E1000_SCC);
4659         adapter->stats.ecol += rd32(E1000_ECOL);
4660         adapter->stats.mcc += rd32(E1000_MCC);
4661         adapter->stats.latecol += rd32(E1000_LATECOL);
4662         adapter->stats.dc += rd32(E1000_DC);
4663         adapter->stats.rlec += rd32(E1000_RLEC);
4664         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4665         adapter->stats.xontxc += rd32(E1000_XONTXC);
4666         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4667         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4668         adapter->stats.fcruc += rd32(E1000_FCRUC);
4669         adapter->stats.gptc += rd32(E1000_GPTC);
4670         adapter->stats.gotc += rd32(E1000_GOTCL);
4671         rd32(E1000_GOTCH); /* clear GOTCL */
4672         adapter->stats.rnbc += rd32(E1000_RNBC);
4673         adapter->stats.ruc += rd32(E1000_RUC);
4674         adapter->stats.rfc += rd32(E1000_RFC);
4675         adapter->stats.rjc += rd32(E1000_RJC);
4676         adapter->stats.tor += rd32(E1000_TORH);
4677         adapter->stats.tot += rd32(E1000_TOTH);
4678         adapter->stats.tpr += rd32(E1000_TPR);
4679
4680         adapter->stats.ptc64 += rd32(E1000_PTC64);
4681         adapter->stats.ptc127 += rd32(E1000_PTC127);
4682         adapter->stats.ptc255 += rd32(E1000_PTC255);
4683         adapter->stats.ptc511 += rd32(E1000_PTC511);
4684         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4685         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4686
4687         adapter->stats.mptc += rd32(E1000_MPTC);
4688         adapter->stats.bptc += rd32(E1000_BPTC);
4689
4690         adapter->stats.tpt += rd32(E1000_TPT);
4691         adapter->stats.colc += rd32(E1000_COLC);
4692
4693         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4694         /* read internal phy specific stats */
4695         reg = rd32(E1000_CTRL_EXT);
4696         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4697                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4698                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4699         }
4700
4701         adapter->stats.tsctc += rd32(E1000_TSCTC);
4702         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4703
4704         adapter->stats.iac += rd32(E1000_IAC);
4705         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4706         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4707         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4708         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4709         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4710         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4711         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4712         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4713
4714         /* Fill out the OS statistics structure */
4715         net_stats->multicast = adapter->stats.mprc;
4716         net_stats->collisions = adapter->stats.colc;
4717
4718         /* Rx Errors */
4719
4720         /* RLEC on some newer hardware can be incorrect so build
4721          * our own version based on RUC and ROC */
4722         net_stats->rx_errors = adapter->stats.rxerrc +
4723                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4724                 adapter->stats.ruc + adapter->stats.roc +
4725                 adapter->stats.cexterr;
4726         net_stats->rx_length_errors = adapter->stats.ruc +
4727                                       adapter->stats.roc;
4728         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4729         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4730         net_stats->rx_missed_errors = adapter->stats.mpc;
4731
4732         /* Tx Errors */
4733         net_stats->tx_errors = adapter->stats.ecol +
4734                                adapter->stats.latecol;
4735         net_stats->tx_aborted_errors = adapter->stats.ecol;
4736         net_stats->tx_window_errors = adapter->stats.latecol;
4737         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4738
4739         /* Tx Dropped needs to be maintained elsewhere */
4740
4741         /* Phy Stats */
4742         if (hw->phy.media_type == e1000_media_type_copper) {
4743                 if ((adapter->link_speed == SPEED_1000) &&
4744                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4745                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4746                         adapter->phy_stats.idle_errors += phy_tmp;
4747                 }
4748         }
4749
4750         /* Management Stats */
4751         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4752         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4753         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4754
4755         /* OS2BMC Stats */
4756         reg = rd32(E1000_MANC);
4757         if (reg & E1000_MANC_EN_BMC2OS) {
4758                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4759                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4760                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4761                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4762         }
4763 }
4764
4765 static irqreturn_t igb_msix_other(int irq, void *data)
4766 {
4767         struct igb_adapter *adapter = data;
4768         struct e1000_hw *hw = &adapter->hw;
4769         u32 icr = rd32(E1000_ICR);
4770         /* reading ICR causes bit 31 of EICR to be cleared */
4771
4772         if (icr & E1000_ICR_DRSTA)
4773                 schedule_work(&adapter->reset_task);
4774
4775         if (icr & E1000_ICR_DOUTSYNC) {
4776                 /* HW is reporting DMA is out of sync */
4777                 adapter->stats.doosync++;
4778                 /* The DMA Out of Sync is also indication of a spoof event
4779                  * in IOV mode. Check the Wrong VM Behavior register to
4780                  * see if it is really a spoof event. */
4781                 igb_check_wvbr(adapter);
4782         }
4783
4784         /* Check for a mailbox event */
4785         if (icr & E1000_ICR_VMMB)
4786                 igb_msg_task(adapter);
4787
4788         if (icr & E1000_ICR_LSC) {
4789                 hw->mac.get_link_status = 1;
4790                 /* guard against interrupt when we're going down */
4791                 if (!test_bit(__IGB_DOWN, &adapter->state))
4792                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4793         }
4794
4795         if (adapter->vfs_allocated_count)
4796                 wr32(E1000_IMS, E1000_IMS_LSC |
4797                                 E1000_IMS_VMMB |
4798                                 E1000_IMS_DOUTSYNC);
4799         else
4800                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4801         wr32(E1000_EIMS, adapter->eims_other);
4802
4803         return IRQ_HANDLED;
4804 }
4805
4806 static void igb_write_itr(struct igb_q_vector *q_vector)
4807 {
4808         struct igb_adapter *adapter = q_vector->adapter;
4809         u32 itr_val = q_vector->itr_val & 0x7FFC;
4810
4811         if (!q_vector->set_itr)
4812                 return;
4813
4814         if (!itr_val)
4815                 itr_val = 0x4;
4816
4817         if (adapter->hw.mac.type == e1000_82575)
4818                 itr_val |= itr_val << 16;
4819         else
4820                 itr_val |= E1000_EITR_CNT_IGNR;
4821
4822         writel(itr_val, q_vector->itr_register);
4823         q_vector->set_itr = 0;
4824 }
4825
4826 static irqreturn_t igb_msix_ring(int irq, void *data)
4827 {
4828         struct igb_q_vector *q_vector = data;
4829
4830         /* Write the ITR value calculated from the previous interrupt. */
4831         igb_write_itr(q_vector);
4832
4833         napi_schedule(&q_vector->napi);
4834
4835         return IRQ_HANDLED;
4836 }
4837
4838 #ifdef CONFIG_IGB_DCA
4839 static void igb_update_dca(struct igb_q_vector *q_vector)
4840 {
4841         struct igb_adapter *adapter = q_vector->adapter;
4842         struct e1000_hw *hw = &adapter->hw;
4843         int cpu = get_cpu();
4844
4845         if (q_vector->cpu == cpu)
4846                 goto out_no_update;
4847
4848         if (q_vector->tx.ring) {
4849                 int q = q_vector->tx.ring->reg_idx;
4850                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4851                 if (hw->mac.type == e1000_82575) {
4852                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4853                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4854                 } else {
4855                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4856                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4857                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4858                 }
4859                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4860                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4861         }
4862         if (q_vector->rx.ring) {
4863                 int q = q_vector->rx.ring->reg_idx;
4864                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4865                 if (hw->mac.type == e1000_82575) {
4866                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4867                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4868                 } else {
4869                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4870                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4871                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4872                 }
4873                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4874                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4875                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4876                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4877         }
4878         q_vector->cpu = cpu;
4879 out_no_update:
4880         put_cpu();
4881 }
4882
4883 static void igb_setup_dca(struct igb_adapter *adapter)
4884 {
4885         struct e1000_hw *hw = &adapter->hw;
4886         int i;
4887
4888         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4889                 return;
4890
4891         /* Always use CB2 mode, difference is masked in the CB driver. */
4892         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4893
4894         for (i = 0; i < adapter->num_q_vectors; i++) {
4895                 adapter->q_vector[i]->cpu = -1;
4896                 igb_update_dca(adapter->q_vector[i]);
4897         }
4898 }
4899
4900 static int __igb_notify_dca(struct device *dev, void *data)
4901 {
4902         struct net_device *netdev = dev_get_drvdata(dev);
4903         struct igb_adapter *adapter = netdev_priv(netdev);
4904         struct pci_dev *pdev = adapter->pdev;
4905         struct e1000_hw *hw = &adapter->hw;
4906         unsigned long event = *(unsigned long *)data;
4907
4908         switch (event) {
4909         case DCA_PROVIDER_ADD:
4910                 /* if already enabled, don't do it again */
4911                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4912                         break;
4913                 if (dca_add_requester(dev) == 0) {
4914                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4915                         dev_info(&pdev->dev, "DCA enabled\n");
4916                         igb_setup_dca(adapter);
4917                         break;
4918                 }
4919                 /* Fall Through since DCA is disabled. */
4920         case DCA_PROVIDER_REMOVE:
4921                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4922                         /* without this a class_device is left
4923                          * hanging around in the sysfs model */
4924                         dca_remove_requester(dev);
4925                         dev_info(&pdev->dev, "DCA disabled\n");
4926                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4927                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4928                 }
4929                 break;
4930         }
4931
4932         return 0;
4933 }
4934
4935 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4936                           void *p)
4937 {
4938         int ret_val;
4939
4940         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4941                                          __igb_notify_dca);
4942
4943         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4944 }
4945 #endif /* CONFIG_IGB_DCA */
4946
4947 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4948 {
4949         struct e1000_hw *hw = &adapter->hw;
4950         u32 ping;
4951         int i;
4952
4953         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4954                 ping = E1000_PF_CONTROL_MSG;
4955                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4956                         ping |= E1000_VT_MSGTYPE_CTS;
4957                 igb_write_mbx(hw, &ping, 1, i);
4958         }
4959 }
4960
4961 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4962 {
4963         struct e1000_hw *hw = &adapter->hw;
4964         u32 vmolr = rd32(E1000_VMOLR(vf));
4965         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4966
4967         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4968                             IGB_VF_FLAG_MULTI_PROMISC);
4969         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4970
4971         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4972                 vmolr |= E1000_VMOLR_MPME;
4973                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4974                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4975         } else {
4976                 /*
4977                  * if we have hashes and we are clearing a multicast promisc
4978                  * flag we need to write the hashes to the MTA as this step
4979                  * was previously skipped
4980                  */
4981                 if (vf_data->num_vf_mc_hashes > 30) {
4982                         vmolr |= E1000_VMOLR_MPME;
4983                 } else if (vf_data->num_vf_mc_hashes) {
4984                         int j;
4985                         vmolr |= E1000_VMOLR_ROMPE;
4986                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4987                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4988                 }
4989         }
4990
4991         wr32(E1000_VMOLR(vf), vmolr);
4992
4993         /* there are flags left unprocessed, likely not supported */
4994         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4995                 return -EINVAL;
4996
4997         return 0;
4998
4999 }
5000
5001 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5002                                   u32 *msgbuf, u32 vf)
5003 {
5004         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5005         u16 *hash_list = (u16 *)&msgbuf[1];
5006         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5007         int i;
5008
5009         /* salt away the number of multicast addresses assigned
5010          * to this VF for later use to restore when the PF multi cast
5011          * list changes
5012          */
5013         vf_data->num_vf_mc_hashes = n;
5014
5015         /* only up to 30 hash values supported */
5016         if (n > 30)
5017                 n = 30;
5018
5019         /* store the hashes for later use */
5020         for (i = 0; i < n; i++)
5021                 vf_data->vf_mc_hashes[i] = hash_list[i];
5022
5023         /* Flush and reset the mta with the new values */
5024         igb_set_rx_mode(adapter->netdev);
5025
5026         return 0;
5027 }
5028
5029 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5030 {
5031         struct e1000_hw *hw = &adapter->hw;
5032         struct vf_data_storage *vf_data;
5033         int i, j;
5034
5035         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5036                 u32 vmolr = rd32(E1000_VMOLR(i));
5037                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5038
5039                 vf_data = &adapter->vf_data[i];
5040
5041                 if ((vf_data->num_vf_mc_hashes > 30) ||
5042                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5043                         vmolr |= E1000_VMOLR_MPME;
5044                 } else if (vf_data->num_vf_mc_hashes) {
5045                         vmolr |= E1000_VMOLR_ROMPE;
5046                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5047                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5048                 }
5049                 wr32(E1000_VMOLR(i), vmolr);
5050         }
5051 }
5052
5053 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5054 {
5055         struct e1000_hw *hw = &adapter->hw;
5056         u32 pool_mask, reg, vid;
5057         int i;
5058
5059         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5060
5061         /* Find the vlan filter for this id */
5062         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5063                 reg = rd32(E1000_VLVF(i));
5064
5065                 /* remove the vf from the pool */
5066                 reg &= ~pool_mask;
5067
5068                 /* if pool is empty then remove entry from vfta */
5069                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5070                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5071                         reg = 0;
5072                         vid = reg & E1000_VLVF_VLANID_MASK;
5073                         igb_vfta_set(hw, vid, false);
5074                 }
5075
5076                 wr32(E1000_VLVF(i), reg);
5077         }
5078
5079         adapter->vf_data[vf].vlans_enabled = 0;
5080 }
5081
5082 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5083 {
5084         struct e1000_hw *hw = &adapter->hw;
5085         u32 reg, i;
5086
5087         /* The vlvf table only exists on 82576 hardware and newer */
5088         if (hw->mac.type < e1000_82576)
5089                 return -1;
5090
5091         /* we only need to do this if VMDq is enabled */
5092         if (!adapter->vfs_allocated_count)
5093                 return -1;
5094
5095         /* Find the vlan filter for this id */
5096         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5097                 reg = rd32(E1000_VLVF(i));
5098                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5099                     vid == (reg & E1000_VLVF_VLANID_MASK))
5100                         break;
5101         }
5102
5103         if (add) {
5104                 if (i == E1000_VLVF_ARRAY_SIZE) {
5105                         /* Did not find a matching VLAN ID entry that was
5106                          * enabled.  Search for a free filter entry, i.e.
5107                          * one without the enable bit set
5108                          */
5109                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5110                                 reg = rd32(E1000_VLVF(i));
5111                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5112                                         break;
5113                         }
5114                 }
5115                 if (i < E1000_VLVF_ARRAY_SIZE) {
5116                         /* Found an enabled/available entry */
5117                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5118
5119                         /* if !enabled we need to set this up in vfta */
5120                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5121                                 /* add VID to filter table */
5122                                 igb_vfta_set(hw, vid, true);
5123                                 reg |= E1000_VLVF_VLANID_ENABLE;
5124                         }
5125                         reg &= ~E1000_VLVF_VLANID_MASK;
5126                         reg |= vid;
5127                         wr32(E1000_VLVF(i), reg);
5128
5129                         /* do not modify RLPML for PF devices */
5130                         if (vf >= adapter->vfs_allocated_count)
5131                                 return 0;
5132
5133                         if (!adapter->vf_data[vf].vlans_enabled) {
5134                                 u32 size;
5135                                 reg = rd32(E1000_VMOLR(vf));
5136                                 size = reg & E1000_VMOLR_RLPML_MASK;
5137                                 size += 4;
5138                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5139                                 reg |= size;
5140                                 wr32(E1000_VMOLR(vf), reg);
5141                         }
5142
5143                         adapter->vf_data[vf].vlans_enabled++;
5144                         return 0;
5145                 }
5146         } else {
5147                 if (i < E1000_VLVF_ARRAY_SIZE) {
5148                         /* remove vf from the pool */
5149                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5150                         /* if pool is empty then remove entry from vfta */
5151                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5152                                 reg = 0;
5153                                 igb_vfta_set(hw, vid, false);
5154                         }
5155                         wr32(E1000_VLVF(i), reg);
5156
5157                         /* do not modify RLPML for PF devices */
5158                         if (vf >= adapter->vfs_allocated_count)
5159                                 return 0;
5160
5161                         adapter->vf_data[vf].vlans_enabled--;
5162                         if (!adapter->vf_data[vf].vlans_enabled) {
5163                                 u32 size;
5164                                 reg = rd32(E1000_VMOLR(vf));
5165                                 size = reg & E1000_VMOLR_RLPML_MASK;
5166                                 size -= 4;
5167                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5168                                 reg |= size;
5169                                 wr32(E1000_VMOLR(vf), reg);
5170                         }
5171                 }
5172         }
5173         return 0;
5174 }
5175
5176 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5177 {
5178         struct e1000_hw *hw = &adapter->hw;
5179
5180         if (vid)
5181                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5182         else
5183                 wr32(E1000_VMVIR(vf), 0);
5184 }
5185
5186 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5187                                int vf, u16 vlan, u8 qos)
5188 {
5189         int err = 0;
5190         struct igb_adapter *adapter = netdev_priv(netdev);
5191
5192         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5193                 return -EINVAL;
5194         if (vlan || qos) {
5195                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5196                 if (err)
5197                         goto out;
5198                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5199                 igb_set_vmolr(adapter, vf, !vlan);
5200                 adapter->vf_data[vf].pf_vlan = vlan;
5201                 adapter->vf_data[vf].pf_qos = qos;
5202                 dev_info(&adapter->pdev->dev,
5203                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5204                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5205                         dev_warn(&adapter->pdev->dev,
5206                                  "The VF VLAN has been set,"
5207                                  " but the PF device is not up.\n");
5208                         dev_warn(&adapter->pdev->dev,
5209                                  "Bring the PF device up before"
5210                                  " attempting to use the VF device.\n");
5211                 }
5212         } else {
5213                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5214                                    false, vf);
5215                 igb_set_vmvir(adapter, vlan, vf);
5216                 igb_set_vmolr(adapter, vf, true);
5217                 adapter->vf_data[vf].pf_vlan = 0;
5218                 adapter->vf_data[vf].pf_qos = 0;
5219        }
5220 out:
5221        return err;
5222 }
5223
5224 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5225 {
5226         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5227         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5228
5229         return igb_vlvf_set(adapter, vid, add, vf);
5230 }
5231
5232 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5233 {
5234         /* clear flags - except flag that indicates PF has set the MAC */
5235         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5236         adapter->vf_data[vf].last_nack = jiffies;
5237
5238         /* reset offloads to defaults */
5239         igb_set_vmolr(adapter, vf, true);
5240
5241         /* reset vlans for device */
5242         igb_clear_vf_vfta(adapter, vf);
5243         if (adapter->vf_data[vf].pf_vlan)
5244                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5245                                     adapter->vf_data[vf].pf_vlan,
5246                                     adapter->vf_data[vf].pf_qos);
5247         else
5248                 igb_clear_vf_vfta(adapter, vf);
5249
5250         /* reset multicast table array for vf */
5251         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5252
5253         /* Flush and reset the mta with the new values */
5254         igb_set_rx_mode(adapter->netdev);
5255 }
5256
5257 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5258 {
5259         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5260
5261         /* generate a new mac address as we were hotplug removed/added */
5262         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5263                 random_ether_addr(vf_mac);
5264
5265         /* process remaining reset events */
5266         igb_vf_reset(adapter, vf);
5267 }
5268
5269 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5270 {
5271         struct e1000_hw *hw = &adapter->hw;
5272         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5273         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5274         u32 reg, msgbuf[3];
5275         u8 *addr = (u8 *)(&msgbuf[1]);
5276
5277         /* process all the same items cleared in a function level reset */
5278         igb_vf_reset(adapter, vf);
5279
5280         /* set vf mac address */
5281         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5282
5283         /* enable transmit and receive for vf */
5284         reg = rd32(E1000_VFTE);
5285         wr32(E1000_VFTE, reg | (1 << vf));
5286         reg = rd32(E1000_VFRE);
5287         wr32(E1000_VFRE, reg | (1 << vf));
5288
5289         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5290
5291         /* reply to reset with ack and vf mac address */
5292         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5293         memcpy(addr, vf_mac, 6);
5294         igb_write_mbx(hw, msgbuf, 3, vf);
5295 }
5296
5297 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5298 {
5299         /*
5300          * The VF MAC Address is stored in a packed array of bytes
5301          * starting at the second 32 bit word of the msg array
5302          */
5303         unsigned char *addr = (char *)&msg[1];
5304         int err = -1;
5305
5306         if (is_valid_ether_addr(addr))
5307                 err = igb_set_vf_mac(adapter, vf, addr);
5308
5309         return err;
5310 }
5311
5312 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5313 {
5314         struct e1000_hw *hw = &adapter->hw;
5315         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5316         u32 msg = E1000_VT_MSGTYPE_NACK;
5317
5318         /* if device isn't clear to send it shouldn't be reading either */
5319         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5320             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5321                 igb_write_mbx(hw, &msg, 1, vf);
5322                 vf_data->last_nack = jiffies;
5323         }
5324 }
5325
5326 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5327 {
5328         struct pci_dev *pdev = adapter->pdev;
5329         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5330         struct e1000_hw *hw = &adapter->hw;
5331         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5332         s32 retval;
5333
5334         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5335
5336         if (retval) {
5337                 /* if receive failed revoke VF CTS stats and restart init */
5338                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5339                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5340                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5341                         return;
5342                 goto out;
5343         }
5344
5345         /* this is a message we already processed, do nothing */
5346         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5347                 return;
5348
5349         /*
5350          * until the vf completes a reset it should not be
5351          * allowed to start any configuration.
5352          */
5353
5354         if (msgbuf[0] == E1000_VF_RESET) {
5355                 igb_vf_reset_msg(adapter, vf);
5356                 return;
5357         }
5358
5359         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5360                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5361                         return;
5362                 retval = -1;
5363                 goto out;
5364         }
5365
5366         switch ((msgbuf[0] & 0xFFFF)) {
5367         case E1000_VF_SET_MAC_ADDR:
5368                 retval = -EINVAL;
5369                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5370                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5371                 else
5372                         dev_warn(&pdev->dev,
5373                                  "VF %d attempted to override administratively "
5374                                  "set MAC address\nReload the VF driver to "
5375                                  "resume operations\n", vf);
5376                 break;
5377         case E1000_VF_SET_PROMISC:
5378                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5379                 break;
5380         case E1000_VF_SET_MULTICAST:
5381                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5382                 break;
5383         case E1000_VF_SET_LPE:
5384                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5385                 break;
5386         case E1000_VF_SET_VLAN:
5387                 retval = -1;
5388                 if (vf_data->pf_vlan)
5389                         dev_warn(&pdev->dev,
5390                                  "VF %d attempted to override administratively "
5391                                  "set VLAN tag\nReload the VF driver to "
5392                                  "resume operations\n", vf);
5393                 else
5394                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5395                 break;
5396         default:
5397                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5398                 retval = -1;
5399                 break;
5400         }
5401
5402         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5403 out:
5404         /* notify the VF of the results of what it sent us */
5405         if (retval)
5406                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5407         else
5408                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5409
5410         igb_write_mbx(hw, msgbuf, 1, vf);
5411 }
5412
5413 static void igb_msg_task(struct igb_adapter *adapter)
5414 {
5415         struct e1000_hw *hw = &adapter->hw;
5416         u32 vf;
5417
5418         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5419                 /* process any reset requests */
5420                 if (!igb_check_for_rst(hw, vf))
5421                         igb_vf_reset_event(adapter, vf);
5422
5423                 /* process any messages pending */
5424                 if (!igb_check_for_msg(hw, vf))
5425                         igb_rcv_msg_from_vf(adapter, vf);
5426
5427                 /* process any acks */
5428                 if (!igb_check_for_ack(hw, vf))
5429                         igb_rcv_ack_from_vf(adapter, vf);
5430         }
5431 }
5432
5433 /**
5434  *  igb_set_uta - Set unicast filter table address
5435  *  @adapter: board private structure
5436  *
5437  *  The unicast table address is a register array of 32-bit registers.
5438  *  The table is meant to be used in a way similar to how the MTA is used
5439  *  however due to certain limitations in the hardware it is necessary to
5440  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5441  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5442  **/
5443 static void igb_set_uta(struct igb_adapter *adapter)
5444 {
5445         struct e1000_hw *hw = &adapter->hw;
5446         int i;
5447
5448         /* The UTA table only exists on 82576 hardware and newer */
5449         if (hw->mac.type < e1000_82576)
5450                 return;
5451
5452         /* we only need to do this if VMDq is enabled */
5453         if (!adapter->vfs_allocated_count)
5454                 return;
5455
5456         for (i = 0; i < hw->mac.uta_reg_count; i++)
5457                 array_wr32(E1000_UTA, i, ~0);
5458 }
5459
5460 /**
5461  * igb_intr_msi - Interrupt Handler
5462  * @irq: interrupt number
5463  * @data: pointer to a network interface device structure
5464  **/
5465 static irqreturn_t igb_intr_msi(int irq, void *data)
5466 {
5467         struct igb_adapter *adapter = data;
5468         struct igb_q_vector *q_vector = adapter->q_vector[0];
5469         struct e1000_hw *hw = &adapter->hw;
5470         /* read ICR disables interrupts using IAM */
5471         u32 icr = rd32(E1000_ICR);
5472
5473         igb_write_itr(q_vector);
5474
5475         if (icr & E1000_ICR_DRSTA)
5476                 schedule_work(&adapter->reset_task);
5477
5478         if (icr & E1000_ICR_DOUTSYNC) {
5479                 /* HW is reporting DMA is out of sync */
5480                 adapter->stats.doosync++;
5481         }
5482
5483         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5484                 hw->mac.get_link_status = 1;
5485                 if (!test_bit(__IGB_DOWN, &adapter->state))
5486                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5487         }
5488
5489         napi_schedule(&q_vector->napi);
5490
5491         return IRQ_HANDLED;
5492 }
5493
5494 /**
5495  * igb_intr - Legacy Interrupt Handler
5496  * @irq: interrupt number
5497  * @data: pointer to a network interface device structure
5498  **/
5499 static irqreturn_t igb_intr(int irq, void *data)
5500 {
5501         struct igb_adapter *adapter = data;
5502         struct igb_q_vector *q_vector = adapter->q_vector[0];
5503         struct e1000_hw *hw = &adapter->hw;
5504         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5505          * need for the IMC write */
5506         u32 icr = rd32(E1000_ICR);
5507
5508         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5509          * not set, then the adapter didn't send an interrupt */
5510         if (!(icr & E1000_ICR_INT_ASSERTED))
5511                 return IRQ_NONE;
5512
5513         igb_write_itr(q_vector);
5514
5515         if (icr & E1000_ICR_DRSTA)
5516                 schedule_work(&adapter->reset_task);
5517
5518         if (icr & E1000_ICR_DOUTSYNC) {
5519                 /* HW is reporting DMA is out of sync */
5520                 adapter->stats.doosync++;
5521         }
5522
5523         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5524                 hw->mac.get_link_status = 1;
5525                 /* guard against interrupt when we're going down */
5526                 if (!test_bit(__IGB_DOWN, &adapter->state))
5527                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5528         }
5529
5530         napi_schedule(&q_vector->napi);
5531
5532         return IRQ_HANDLED;
5533 }
5534
5535 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5536 {
5537         struct igb_adapter *adapter = q_vector->adapter;
5538         struct e1000_hw *hw = &adapter->hw;
5539
5540         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5541             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5542                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5543                         igb_set_itr(q_vector);
5544                 else
5545                         igb_update_ring_itr(q_vector);
5546         }
5547
5548         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5549                 if (adapter->msix_entries)
5550                         wr32(E1000_EIMS, q_vector->eims_value);
5551                 else
5552                         igb_irq_enable(adapter);
5553         }
5554 }
5555
5556 /**
5557  * igb_poll - NAPI Rx polling callback
5558  * @napi: napi polling structure
5559  * @budget: count of how many packets we should handle
5560  **/
5561 static int igb_poll(struct napi_struct *napi, int budget)
5562 {
5563         struct igb_q_vector *q_vector = container_of(napi,
5564                                                      struct igb_q_vector,
5565                                                      napi);
5566         bool clean_complete = true;
5567
5568 #ifdef CONFIG_IGB_DCA
5569         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5570                 igb_update_dca(q_vector);
5571 #endif
5572         if (q_vector->tx.ring)
5573                 clean_complete = igb_clean_tx_irq(q_vector);
5574
5575         if (q_vector->rx.ring)
5576                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5577
5578         /* If all work not completed, return budget and keep polling */
5579         if (!clean_complete)
5580                 return budget;
5581
5582         /* If not enough Rx work done, exit the polling mode */
5583         napi_complete(napi);
5584         igb_ring_irq_enable(q_vector);
5585
5586         return 0;
5587 }
5588
5589 /**
5590  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5591  * @adapter: board private structure
5592  * @shhwtstamps: timestamp structure to update
5593  * @regval: unsigned 64bit system time value.
5594  *
5595  * We need to convert the system time value stored in the RX/TXSTMP registers
5596  * into a hwtstamp which can be used by the upper level timestamping functions
5597  */
5598 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5599                                    struct skb_shared_hwtstamps *shhwtstamps,
5600                                    u64 regval)
5601 {
5602         u64 ns;
5603
5604         /*
5605          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5606          * 24 to match clock shift we setup earlier.
5607          */
5608         if (adapter->hw.mac.type == e1000_82580)
5609                 regval <<= IGB_82580_TSYNC_SHIFT;
5610
5611         ns = timecounter_cyc2time(&adapter->clock, regval);
5612         timecompare_update(&adapter->compare, ns);
5613         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5614         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5615         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5616 }
5617
5618 /**
5619  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5620  * @q_vector: pointer to q_vector containing needed info
5621  * @buffer: pointer to igb_tx_buffer structure
5622  *
5623  * If we were asked to do hardware stamping and such a time stamp is
5624  * available, then it must have been for this skb here because we only
5625  * allow only one such packet into the queue.
5626  */
5627 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5628                             struct igb_tx_buffer *buffer_info)
5629 {
5630         struct igb_adapter *adapter = q_vector->adapter;
5631         struct e1000_hw *hw = &adapter->hw;
5632         struct skb_shared_hwtstamps shhwtstamps;
5633         u64 regval;
5634
5635         /* if skb does not support hw timestamp or TX stamp not valid exit */
5636         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5637             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5638                 return;
5639
5640         regval = rd32(E1000_TXSTMPL);
5641         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5642
5643         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5644         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5645 }
5646
5647 /**
5648  * igb_clean_tx_irq - Reclaim resources after transmit completes
5649  * @q_vector: pointer to q_vector containing needed info
5650  * returns true if ring is completely cleaned
5651  **/
5652 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5653 {
5654         struct igb_adapter *adapter = q_vector->adapter;
5655         struct igb_ring *tx_ring = q_vector->tx.ring;
5656         struct igb_tx_buffer *tx_buffer;
5657         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5658         unsigned int total_bytes = 0, total_packets = 0;
5659         unsigned int budget = q_vector->tx.work_limit;
5660         unsigned int i = tx_ring->next_to_clean;
5661
5662         if (test_bit(__IGB_DOWN, &adapter->state))
5663                 return true;
5664
5665         tx_buffer = &tx_ring->tx_buffer_info[i];
5666         tx_desc = IGB_TX_DESC(tx_ring, i);
5667         i -= tx_ring->count;
5668
5669         for (; budget; budget--) {
5670                 eop_desc = tx_buffer->next_to_watch;
5671
5672                 /* prevent any other reads prior to eop_desc */
5673                 rmb();
5674
5675                 /* if next_to_watch is not set then there is no work pending */
5676                 if (!eop_desc)
5677                         break;
5678
5679                 /* if DD is not set pending work has not been completed */
5680                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5681                         break;
5682
5683                 /* clear next_to_watch to prevent false hangs */
5684                 tx_buffer->next_to_watch = NULL;
5685
5686                 /* update the statistics for this packet */
5687                 total_bytes += tx_buffer->bytecount;
5688                 total_packets += tx_buffer->gso_segs;
5689
5690                 /* retrieve hardware timestamp */
5691                 igb_tx_hwtstamp(q_vector, tx_buffer);
5692
5693                 /* free the skb */
5694                 dev_kfree_skb_any(tx_buffer->skb);
5695                 tx_buffer->skb = NULL;
5696
5697                 /* unmap skb header data */
5698                 dma_unmap_single(tx_ring->dev,
5699                                  tx_buffer->dma,
5700                                  tx_buffer->length,
5701                                  DMA_TO_DEVICE);
5702
5703                 /* clear last DMA location and unmap remaining buffers */
5704                 while (tx_desc != eop_desc) {
5705                         tx_buffer->dma = 0;
5706
5707                         tx_buffer++;
5708                         tx_desc++;
5709                         i++;
5710                         if (unlikely(!i)) {
5711                                 i -= tx_ring->count;
5712                                 tx_buffer = tx_ring->tx_buffer_info;
5713                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5714                         }
5715
5716                         /* unmap any remaining paged data */
5717                         if (tx_buffer->dma) {
5718                                 dma_unmap_page(tx_ring->dev,
5719                                                tx_buffer->dma,
5720                                                tx_buffer->length,
5721                                                DMA_TO_DEVICE);
5722                         }
5723                 }
5724
5725                 /* clear last DMA location */
5726                 tx_buffer->dma = 0;
5727
5728                 /* move us one more past the eop_desc for start of next pkt */
5729                 tx_buffer++;
5730                 tx_desc++;
5731                 i++;
5732                 if (unlikely(!i)) {
5733                         i -= tx_ring->count;
5734                         tx_buffer = tx_ring->tx_buffer_info;
5735                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5736                 }
5737         }
5738
5739         i += tx_ring->count;
5740         tx_ring->next_to_clean = i;
5741         u64_stats_update_begin(&tx_ring->tx_syncp);
5742         tx_ring->tx_stats.bytes += total_bytes;
5743         tx_ring->tx_stats.packets += total_packets;
5744         u64_stats_update_end(&tx_ring->tx_syncp);
5745         q_vector->tx.total_bytes += total_bytes;
5746         q_vector->tx.total_packets += total_packets;
5747
5748         if (tx_ring->detect_tx_hung) {
5749                 struct e1000_hw *hw = &adapter->hw;
5750
5751                 eop_desc = tx_buffer->next_to_watch;
5752
5753                 /* Detect a transmit hang in hardware, this serializes the
5754                  * check with the clearing of time_stamp and movement of i */
5755                 tx_ring->detect_tx_hung = false;
5756                 if (eop_desc &&
5757                     time_after(jiffies, tx_buffer->time_stamp +
5758                                (adapter->tx_timeout_factor * HZ)) &&
5759                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5760
5761                         /* detected Tx unit hang */
5762                         dev_err(tx_ring->dev,
5763                                 "Detected Tx Unit Hang\n"
5764                                 "  Tx Queue             <%d>\n"
5765                                 "  TDH                  <%x>\n"
5766                                 "  TDT                  <%x>\n"
5767                                 "  next_to_use          <%x>\n"
5768                                 "  next_to_clean        <%x>\n"
5769                                 "buffer_info[next_to_clean]\n"
5770                                 "  time_stamp           <%lx>\n"
5771                                 "  next_to_watch        <%p>\n"
5772                                 "  jiffies              <%lx>\n"
5773                                 "  desc.status          <%x>\n",
5774                                 tx_ring->queue_index,
5775                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5776                                 readl(tx_ring->tail),
5777                                 tx_ring->next_to_use,
5778                                 tx_ring->next_to_clean,
5779                                 tx_buffer->time_stamp,
5780                                 eop_desc,
5781                                 jiffies,
5782                                 eop_desc->wb.status);
5783                         netif_stop_subqueue(tx_ring->netdev,
5784                                             tx_ring->queue_index);
5785
5786                         /* we are about to reset, no point in enabling stuff */
5787                         return true;
5788                 }
5789         }
5790
5791         if (unlikely(total_packets &&
5792                      netif_carrier_ok(tx_ring->netdev) &&
5793                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5794                 /* Make sure that anybody stopping the queue after this
5795                  * sees the new next_to_clean.
5796                  */
5797                 smp_mb();
5798                 if (__netif_subqueue_stopped(tx_ring->netdev,
5799                                              tx_ring->queue_index) &&
5800                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5801                         netif_wake_subqueue(tx_ring->netdev,
5802                                             tx_ring->queue_index);
5803
5804                         u64_stats_update_begin(&tx_ring->tx_syncp);
5805                         tx_ring->tx_stats.restart_queue++;
5806                         u64_stats_update_end(&tx_ring->tx_syncp);
5807                 }
5808         }
5809
5810         return !!budget;
5811 }
5812
5813 static inline void igb_rx_checksum(struct igb_ring *ring,
5814                                    u32 status_err, struct sk_buff *skb)
5815 {
5816         skb_checksum_none_assert(skb);
5817
5818         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5819         if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags) ||
5820              (status_err & E1000_RXD_STAT_IXSM))
5821                 return;
5822
5823         /* TCP/UDP checksum error bit is set */
5824         if (status_err &
5825             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5826                 /*
5827                  * work around errata with sctp packets where the TCPE aka
5828                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5829                  * packets, (aka let the stack check the crc32c)
5830                  */
5831                 if (!((skb->len == 60) &&
5832                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5833                         u64_stats_update_begin(&ring->rx_syncp);
5834                         ring->rx_stats.csum_err++;
5835                         u64_stats_update_end(&ring->rx_syncp);
5836                 }
5837                 /* let the stack verify checksum errors */
5838                 return;
5839         }
5840         /* It must be a TCP or UDP packet with a valid checksum */
5841         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5842                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5843
5844         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5845 }
5846
5847 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5848                                    struct sk_buff *skb)
5849 {
5850         struct igb_adapter *adapter = q_vector->adapter;
5851         struct e1000_hw *hw = &adapter->hw;
5852         u64 regval;
5853
5854         /*
5855          * If this bit is set, then the RX registers contain the time stamp. No
5856          * other packet will be time stamped until we read these registers, so
5857          * read the registers to make them available again. Because only one
5858          * packet can be time stamped at a time, we know that the register
5859          * values must belong to this one here and therefore we don't need to
5860          * compare any of the additional attributes stored for it.
5861          *
5862          * If nothing went wrong, then it should have a shared tx_flags that we
5863          * can turn into a skb_shared_hwtstamps.
5864          */
5865         if (staterr & E1000_RXDADV_STAT_TSIP) {
5866                 u32 *stamp = (u32 *)skb->data;
5867                 regval = le32_to_cpu(*(stamp + 2));
5868                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5869                 skb_pull(skb, IGB_TS_HDR_LEN);
5870         } else {
5871                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5872                         return;
5873
5874                 regval = rd32(E1000_RXSTMPL);
5875                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5876         }
5877
5878         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5879 }
5880 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5881 {
5882         /* HW will not DMA in data larger than the given buffer, even if it
5883          * parses the (NFS, of course) header to be larger.  In that case, it
5884          * fills the header buffer and spills the rest into the page.
5885          */
5886         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5887                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5888         if (hlen > IGB_RX_HDR_LEN)
5889                 hlen = IGB_RX_HDR_LEN;
5890         return hlen;
5891 }
5892
5893 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5894 {
5895         struct igb_ring *rx_ring = q_vector->rx.ring;
5896         union e1000_adv_rx_desc *rx_desc;
5897         const int current_node = numa_node_id();
5898         unsigned int total_bytes = 0, total_packets = 0;
5899         u32 staterr;
5900         u16 cleaned_count = igb_desc_unused(rx_ring);
5901         u16 i = rx_ring->next_to_clean;
5902
5903         rx_desc = IGB_RX_DESC(rx_ring, i);
5904         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5905
5906         while (staterr & E1000_RXD_STAT_DD) {
5907                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5908                 struct sk_buff *skb = buffer_info->skb;
5909                 union e1000_adv_rx_desc *next_rxd;
5910
5911                 buffer_info->skb = NULL;
5912                 prefetch(skb->data);
5913
5914                 i++;
5915                 if (i == rx_ring->count)
5916                         i = 0;
5917
5918                 next_rxd = IGB_RX_DESC(rx_ring, i);
5919                 prefetch(next_rxd);
5920
5921                 /*
5922                  * This memory barrier is needed to keep us from reading
5923                  * any other fields out of the rx_desc until we know the
5924                  * RXD_STAT_DD bit is set
5925                  */
5926                 rmb();
5927
5928                 if (!skb_is_nonlinear(skb)) {
5929                         __skb_put(skb, igb_get_hlen(rx_desc));
5930                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
5931                                          IGB_RX_HDR_LEN,
5932                                          DMA_FROM_DEVICE);
5933                         buffer_info->dma = 0;
5934                 }
5935
5936                 if (rx_desc->wb.upper.length) {
5937                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5938
5939                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5940                                                 buffer_info->page,
5941                                                 buffer_info->page_offset,
5942                                                 length);
5943
5944                         skb->len += length;
5945                         skb->data_len += length;
5946                         skb->truesize += length;
5947
5948                         if ((page_count(buffer_info->page) != 1) ||
5949                             (page_to_nid(buffer_info->page) != current_node))
5950                                 buffer_info->page = NULL;
5951                         else
5952                                 get_page(buffer_info->page);
5953
5954                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5955                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5956                         buffer_info->page_dma = 0;
5957                 }
5958
5959                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5960                         struct igb_rx_buffer *next_buffer;
5961                         next_buffer = &rx_ring->rx_buffer_info[i];
5962                         buffer_info->skb = next_buffer->skb;
5963                         buffer_info->dma = next_buffer->dma;
5964                         next_buffer->skb = skb;
5965                         next_buffer->dma = 0;
5966                         goto next_desc;
5967                 }
5968
5969                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5970                         dev_kfree_skb_any(skb);
5971                         goto next_desc;
5972                 }
5973
5974                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5975                         igb_rx_hwtstamp(q_vector, staterr, skb);
5976                 total_bytes += skb->len;
5977                 total_packets++;
5978
5979                 igb_rx_checksum(rx_ring, staterr, skb);
5980
5981                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5982
5983                 if (staterr & E1000_RXD_STAT_VP) {
5984                         u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5985
5986                         __vlan_hwaccel_put_tag(skb, vid);
5987                 }
5988                 napi_gro_receive(&q_vector->napi, skb);
5989
5990                 budget--;
5991 next_desc:
5992                 if (!budget)
5993                         break;
5994
5995                 cleaned_count++;
5996                 /* return some buffers to hardware, one at a time is too slow */
5997                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5998                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
5999                         cleaned_count = 0;
6000                 }
6001
6002                 /* use prefetched values */
6003                 rx_desc = next_rxd;
6004                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
6005         }
6006
6007         rx_ring->next_to_clean = i;
6008         u64_stats_update_begin(&rx_ring->rx_syncp);
6009         rx_ring->rx_stats.packets += total_packets;
6010         rx_ring->rx_stats.bytes += total_bytes;
6011         u64_stats_update_end(&rx_ring->rx_syncp);
6012         q_vector->rx.total_packets += total_packets;
6013         q_vector->rx.total_bytes += total_bytes;
6014
6015         if (cleaned_count)
6016                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6017
6018         return !!budget;
6019 }
6020
6021 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6022                                  struct igb_rx_buffer *bi)
6023 {
6024         struct sk_buff *skb = bi->skb;
6025         dma_addr_t dma = bi->dma;
6026
6027         if (dma)
6028                 return true;
6029
6030         if (likely(!skb)) {
6031                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6032                                                 IGB_RX_HDR_LEN);
6033                 bi->skb = skb;
6034                 if (!skb) {
6035                         rx_ring->rx_stats.alloc_failed++;
6036                         return false;
6037                 }
6038
6039                 /* initialize skb for ring */
6040                 skb_record_rx_queue(skb, rx_ring->queue_index);
6041         }
6042
6043         dma = dma_map_single(rx_ring->dev, skb->data,
6044                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6045
6046         if (dma_mapping_error(rx_ring->dev, dma)) {
6047                 rx_ring->rx_stats.alloc_failed++;
6048                 return false;
6049         }
6050
6051         bi->dma = dma;
6052         return true;
6053 }
6054
6055 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6056                                   struct igb_rx_buffer *bi)
6057 {
6058         struct page *page = bi->page;
6059         dma_addr_t page_dma = bi->page_dma;
6060         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6061
6062         if (page_dma)
6063                 return true;
6064
6065         if (!page) {
6066                 page = netdev_alloc_page(rx_ring->netdev);
6067                 bi->page = page;
6068                 if (unlikely(!page)) {
6069                         rx_ring->rx_stats.alloc_failed++;
6070                         return false;
6071                 }
6072         }
6073
6074         page_dma = dma_map_page(rx_ring->dev, page,
6075                                 page_offset, PAGE_SIZE / 2,
6076                                 DMA_FROM_DEVICE);
6077
6078         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6079                 rx_ring->rx_stats.alloc_failed++;
6080                 return false;
6081         }
6082
6083         bi->page_dma = page_dma;
6084         bi->page_offset = page_offset;
6085         return true;
6086 }
6087
6088 /**
6089  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6090  * @adapter: address of board private structure
6091  **/
6092 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6093 {
6094         union e1000_adv_rx_desc *rx_desc;
6095         struct igb_rx_buffer *bi;
6096         u16 i = rx_ring->next_to_use;
6097
6098         rx_desc = IGB_RX_DESC(rx_ring, i);
6099         bi = &rx_ring->rx_buffer_info[i];
6100         i -= rx_ring->count;
6101
6102         while (cleaned_count--) {
6103                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6104                         break;
6105
6106                 /* Refresh the desc even if buffer_addrs didn't change
6107                  * because each write-back erases this info. */
6108                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6109
6110                 if (!igb_alloc_mapped_page(rx_ring, bi))
6111                         break;
6112
6113                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6114
6115                 rx_desc++;
6116                 bi++;
6117                 i++;
6118                 if (unlikely(!i)) {
6119                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6120                         bi = rx_ring->rx_buffer_info;
6121                         i -= rx_ring->count;
6122                 }
6123
6124                 /* clear the hdr_addr for the next_to_use descriptor */
6125                 rx_desc->read.hdr_addr = 0;
6126         }
6127
6128         i += rx_ring->count;
6129
6130         if (rx_ring->next_to_use != i) {
6131                 rx_ring->next_to_use = i;
6132
6133                 /* Force memory writes to complete before letting h/w
6134                  * know there are new descriptors to fetch.  (Only
6135                  * applicable for weak-ordered memory model archs,
6136                  * such as IA-64). */
6137                 wmb();
6138                 writel(i, rx_ring->tail);
6139         }
6140 }
6141
6142 /**
6143  * igb_mii_ioctl -
6144  * @netdev:
6145  * @ifreq:
6146  * @cmd:
6147  **/
6148 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6149 {
6150         struct igb_adapter *adapter = netdev_priv(netdev);
6151         struct mii_ioctl_data *data = if_mii(ifr);
6152
6153         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6154                 return -EOPNOTSUPP;
6155
6156         switch (cmd) {
6157         case SIOCGMIIPHY:
6158                 data->phy_id = adapter->hw.phy.addr;
6159                 break;
6160         case SIOCGMIIREG:
6161                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6162                                      &data->val_out))
6163                         return -EIO;
6164                 break;
6165         case SIOCSMIIREG:
6166         default:
6167                 return -EOPNOTSUPP;
6168         }
6169         return 0;
6170 }
6171
6172 /**
6173  * igb_hwtstamp_ioctl - control hardware time stamping
6174  * @netdev:
6175  * @ifreq:
6176  * @cmd:
6177  *
6178  * Outgoing time stamping can be enabled and disabled. Play nice and
6179  * disable it when requested, although it shouldn't case any overhead
6180  * when no packet needs it. At most one packet in the queue may be
6181  * marked for time stamping, otherwise it would be impossible to tell
6182  * for sure to which packet the hardware time stamp belongs.
6183  *
6184  * Incoming time stamping has to be configured via the hardware
6185  * filters. Not all combinations are supported, in particular event
6186  * type has to be specified. Matching the kind of event packet is
6187  * not supported, with the exception of "all V2 events regardless of
6188  * level 2 or 4".
6189  *
6190  **/
6191 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6192                               struct ifreq *ifr, int cmd)
6193 {
6194         struct igb_adapter *adapter = netdev_priv(netdev);
6195         struct e1000_hw *hw = &adapter->hw;
6196         struct hwtstamp_config config;
6197         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6198         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6199         u32 tsync_rx_cfg = 0;
6200         bool is_l4 = false;
6201         bool is_l2 = false;
6202         u32 regval;
6203
6204         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6205                 return -EFAULT;
6206
6207         /* reserved for future extensions */
6208         if (config.flags)
6209                 return -EINVAL;
6210
6211         switch (config.tx_type) {
6212         case HWTSTAMP_TX_OFF:
6213                 tsync_tx_ctl = 0;
6214         case HWTSTAMP_TX_ON:
6215                 break;
6216         default:
6217                 return -ERANGE;
6218         }
6219
6220         switch (config.rx_filter) {
6221         case HWTSTAMP_FILTER_NONE:
6222                 tsync_rx_ctl = 0;
6223                 break;
6224         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6225         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6226         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6227         case HWTSTAMP_FILTER_ALL:
6228                 /*
6229                  * register TSYNCRXCFG must be set, therefore it is not
6230                  * possible to time stamp both Sync and Delay_Req messages
6231                  * => fall back to time stamping all packets
6232                  */
6233                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6234                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6235                 break;
6236         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6237                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6238                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6239                 is_l4 = true;
6240                 break;
6241         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6242                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6243                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6244                 is_l4 = true;
6245                 break;
6246         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6247         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6248                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6249                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6250                 is_l2 = true;
6251                 is_l4 = true;
6252                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6253                 break;
6254         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6255         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6256                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6257                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6258                 is_l2 = true;
6259                 is_l4 = true;
6260                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6261                 break;
6262         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6263         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6264         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6265                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6266                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6267                 is_l2 = true;
6268                 break;
6269         default:
6270                 return -ERANGE;
6271         }
6272
6273         if (hw->mac.type == e1000_82575) {
6274                 if (tsync_rx_ctl | tsync_tx_ctl)
6275                         return -EINVAL;
6276                 return 0;
6277         }
6278
6279         /*
6280          * Per-packet timestamping only works if all packets are
6281          * timestamped, so enable timestamping in all packets as
6282          * long as one rx filter was configured.
6283          */
6284         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6285                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6286                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6287         }
6288
6289         /* enable/disable TX */
6290         regval = rd32(E1000_TSYNCTXCTL);
6291         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6292         regval |= tsync_tx_ctl;
6293         wr32(E1000_TSYNCTXCTL, regval);
6294
6295         /* enable/disable RX */
6296         regval = rd32(E1000_TSYNCRXCTL);
6297         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6298         regval |= tsync_rx_ctl;
6299         wr32(E1000_TSYNCRXCTL, regval);
6300
6301         /* define which PTP packets are time stamped */
6302         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6303
6304         /* define ethertype filter for timestamped packets */
6305         if (is_l2)
6306                 wr32(E1000_ETQF(3),
6307                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6308                                  E1000_ETQF_1588 | /* enable timestamping */
6309                                  ETH_P_1588));     /* 1588 eth protocol type */
6310         else
6311                 wr32(E1000_ETQF(3), 0);
6312
6313 #define PTP_PORT 319
6314         /* L4 Queue Filter[3]: filter by destination port and protocol */
6315         if (is_l4) {
6316                 u32 ftqf = (IPPROTO_UDP /* UDP */
6317                         | E1000_FTQF_VF_BP /* VF not compared */
6318                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6319                         | E1000_FTQF_MASK); /* mask all inputs */
6320                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6321
6322                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6323                 wr32(E1000_IMIREXT(3),
6324                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6325                 if (hw->mac.type == e1000_82576) {
6326                         /* enable source port check */
6327                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6328                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6329                 }
6330                 wr32(E1000_FTQF(3), ftqf);
6331         } else {
6332                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6333         }
6334         wrfl();
6335
6336         adapter->hwtstamp_config = config;
6337
6338         /* clear TX/RX time stamp registers, just to be sure */
6339         regval = rd32(E1000_TXSTMPH);
6340         regval = rd32(E1000_RXSTMPH);
6341
6342         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6343                 -EFAULT : 0;
6344 }
6345
6346 /**
6347  * igb_ioctl -
6348  * @netdev:
6349  * @ifreq:
6350  * @cmd:
6351  **/
6352 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6353 {
6354         switch (cmd) {
6355         case SIOCGMIIPHY:
6356         case SIOCGMIIREG:
6357         case SIOCSMIIREG:
6358                 return igb_mii_ioctl(netdev, ifr, cmd);
6359         case SIOCSHWTSTAMP:
6360                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6361         default:
6362                 return -EOPNOTSUPP;
6363         }
6364 }
6365
6366 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6367 {
6368         struct igb_adapter *adapter = hw->back;
6369         u16 cap_offset;
6370
6371         cap_offset = adapter->pdev->pcie_cap;
6372         if (!cap_offset)
6373                 return -E1000_ERR_CONFIG;
6374
6375         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6376
6377         return 0;
6378 }
6379
6380 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6381 {
6382         struct igb_adapter *adapter = hw->back;
6383         u16 cap_offset;
6384
6385         cap_offset = adapter->pdev->pcie_cap;
6386         if (!cap_offset)
6387                 return -E1000_ERR_CONFIG;
6388
6389         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6390
6391         return 0;
6392 }
6393
6394 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6395 {
6396         struct igb_adapter *adapter = netdev_priv(netdev);
6397         struct e1000_hw *hw = &adapter->hw;
6398         u32 ctrl, rctl;
6399
6400         igb_irq_disable(adapter);
6401
6402         if (features & NETIF_F_HW_VLAN_RX) {
6403                 /* enable VLAN tag insert/strip */
6404                 ctrl = rd32(E1000_CTRL);
6405                 ctrl |= E1000_CTRL_VME;
6406                 wr32(E1000_CTRL, ctrl);
6407
6408                 /* Disable CFI check */
6409                 rctl = rd32(E1000_RCTL);
6410                 rctl &= ~E1000_RCTL_CFIEN;
6411                 wr32(E1000_RCTL, rctl);
6412         } else {
6413                 /* disable VLAN tag insert/strip */
6414                 ctrl = rd32(E1000_CTRL);
6415                 ctrl &= ~E1000_CTRL_VME;
6416                 wr32(E1000_CTRL, ctrl);
6417         }
6418
6419         igb_rlpml_set(adapter);
6420
6421         if (!test_bit(__IGB_DOWN, &adapter->state))
6422                 igb_irq_enable(adapter);
6423 }
6424
6425 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6426 {
6427         struct igb_adapter *adapter = netdev_priv(netdev);
6428         struct e1000_hw *hw = &adapter->hw;
6429         int pf_id = adapter->vfs_allocated_count;
6430
6431         /* attempt to add filter to vlvf array */
6432         igb_vlvf_set(adapter, vid, true, pf_id);
6433
6434         /* add the filter since PF can receive vlans w/o entry in vlvf */
6435         igb_vfta_set(hw, vid, true);
6436
6437         set_bit(vid, adapter->active_vlans);
6438 }
6439
6440 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6441 {
6442         struct igb_adapter *adapter = netdev_priv(netdev);
6443         struct e1000_hw *hw = &adapter->hw;
6444         int pf_id = adapter->vfs_allocated_count;
6445         s32 err;
6446
6447         igb_irq_disable(adapter);
6448
6449         if (!test_bit(__IGB_DOWN, &adapter->state))
6450                 igb_irq_enable(adapter);
6451
6452         /* remove vlan from VLVF table array */
6453         err = igb_vlvf_set(adapter, vid, false, pf_id);
6454
6455         /* if vid was not present in VLVF just remove it from table */
6456         if (err)
6457                 igb_vfta_set(hw, vid, false);
6458
6459         clear_bit(vid, adapter->active_vlans);
6460 }
6461
6462 static void igb_restore_vlan(struct igb_adapter *adapter)
6463 {
6464         u16 vid;
6465
6466         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6467                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6468 }
6469
6470 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6471 {
6472         struct pci_dev *pdev = adapter->pdev;
6473         struct e1000_mac_info *mac = &adapter->hw.mac;
6474
6475         mac->autoneg = 0;
6476
6477         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6478          * for the switch() below to work */
6479         if ((spd & 1) || (dplx & ~1))
6480                 goto err_inval;
6481
6482         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6483         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6484             spd != SPEED_1000 &&
6485             dplx != DUPLEX_FULL)
6486                 goto err_inval;
6487
6488         switch (spd + dplx) {
6489         case SPEED_10 + DUPLEX_HALF:
6490                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6491                 break;
6492         case SPEED_10 + DUPLEX_FULL:
6493                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6494                 break;
6495         case SPEED_100 + DUPLEX_HALF:
6496                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6497                 break;
6498         case SPEED_100 + DUPLEX_FULL:
6499                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6500                 break;
6501         case SPEED_1000 + DUPLEX_FULL:
6502                 mac->autoneg = 1;
6503                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6504                 break;
6505         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6506         default:
6507                 goto err_inval;
6508         }
6509         return 0;
6510
6511 err_inval:
6512         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6513         return -EINVAL;
6514 }
6515
6516 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6517 {
6518         struct net_device *netdev = pci_get_drvdata(pdev);
6519         struct igb_adapter *adapter = netdev_priv(netdev);
6520         struct e1000_hw *hw = &adapter->hw;
6521         u32 ctrl, rctl, status;
6522         u32 wufc = adapter->wol;
6523 #ifdef CONFIG_PM
6524         int retval = 0;
6525 #endif
6526
6527         netif_device_detach(netdev);
6528
6529         if (netif_running(netdev))
6530                 igb_close(netdev);
6531
6532         igb_clear_interrupt_scheme(adapter);
6533
6534 #ifdef CONFIG_PM
6535         retval = pci_save_state(pdev);
6536         if (retval)
6537                 return retval;
6538 #endif
6539
6540         status = rd32(E1000_STATUS);
6541         if (status & E1000_STATUS_LU)
6542                 wufc &= ~E1000_WUFC_LNKC;
6543
6544         if (wufc) {
6545                 igb_setup_rctl(adapter);
6546                 igb_set_rx_mode(netdev);
6547
6548                 /* turn on all-multi mode if wake on multicast is enabled */
6549                 if (wufc & E1000_WUFC_MC) {
6550                         rctl = rd32(E1000_RCTL);
6551                         rctl |= E1000_RCTL_MPE;
6552                         wr32(E1000_RCTL, rctl);
6553                 }
6554
6555                 ctrl = rd32(E1000_CTRL);
6556                 /* advertise wake from D3Cold */
6557                 #define E1000_CTRL_ADVD3WUC 0x00100000
6558                 /* phy power management enable */
6559                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6560                 ctrl |= E1000_CTRL_ADVD3WUC;
6561                 wr32(E1000_CTRL, ctrl);
6562
6563                 /* Allow time for pending master requests to run */
6564                 igb_disable_pcie_master(hw);
6565
6566                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6567                 wr32(E1000_WUFC, wufc);
6568         } else {
6569                 wr32(E1000_WUC, 0);
6570                 wr32(E1000_WUFC, 0);
6571         }
6572
6573         *enable_wake = wufc || adapter->en_mng_pt;
6574         if (!*enable_wake)
6575                 igb_power_down_link(adapter);
6576         else
6577                 igb_power_up_link(adapter);
6578
6579         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6580          * would have already happened in close and is redundant. */
6581         igb_release_hw_control(adapter);
6582
6583         pci_disable_device(pdev);
6584
6585         return 0;
6586 }
6587
6588 #ifdef CONFIG_PM
6589 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6590 {
6591         int retval;
6592         bool wake;
6593
6594         retval = __igb_shutdown(pdev, &wake);
6595         if (retval)
6596                 return retval;
6597
6598         if (wake) {
6599                 pci_prepare_to_sleep(pdev);
6600         } else {
6601                 pci_wake_from_d3(pdev, false);
6602                 pci_set_power_state(pdev, PCI_D3hot);
6603         }
6604
6605         return 0;
6606 }
6607
6608 static int igb_resume(struct pci_dev *pdev)
6609 {
6610         struct net_device *netdev = pci_get_drvdata(pdev);
6611         struct igb_adapter *adapter = netdev_priv(netdev);
6612         struct e1000_hw *hw = &adapter->hw;
6613         u32 err;
6614
6615         pci_set_power_state(pdev, PCI_D0);
6616         pci_restore_state(pdev);
6617         pci_save_state(pdev);
6618
6619         err = pci_enable_device_mem(pdev);
6620         if (err) {
6621                 dev_err(&pdev->dev,
6622                         "igb: Cannot enable PCI device from suspend\n");
6623                 return err;
6624         }
6625         pci_set_master(pdev);
6626
6627         pci_enable_wake(pdev, PCI_D3hot, 0);
6628         pci_enable_wake(pdev, PCI_D3cold, 0);
6629
6630         if (igb_init_interrupt_scheme(adapter)) {
6631                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6632                 return -ENOMEM;
6633         }
6634
6635         igb_reset(adapter);
6636
6637         /* let the f/w know that the h/w is now under the control of the
6638          * driver. */
6639         igb_get_hw_control(adapter);
6640
6641         wr32(E1000_WUS, ~0);
6642
6643         if (netif_running(netdev)) {
6644                 err = igb_open(netdev);
6645                 if (err)
6646                         return err;
6647         }
6648
6649         netif_device_attach(netdev);
6650
6651         return 0;
6652 }
6653 #endif
6654
6655 static void igb_shutdown(struct pci_dev *pdev)
6656 {
6657         bool wake;
6658
6659         __igb_shutdown(pdev, &wake);
6660
6661         if (system_state == SYSTEM_POWER_OFF) {
6662                 pci_wake_from_d3(pdev, wake);
6663                 pci_set_power_state(pdev, PCI_D3hot);
6664         }
6665 }
6666
6667 #ifdef CONFIG_NET_POLL_CONTROLLER
6668 /*
6669  * Polling 'interrupt' - used by things like netconsole to send skbs
6670  * without having to re-enable interrupts. It's not called while
6671  * the interrupt routine is executing.
6672  */
6673 static void igb_netpoll(struct net_device *netdev)
6674 {
6675         struct igb_adapter *adapter = netdev_priv(netdev);
6676         struct e1000_hw *hw = &adapter->hw;
6677         int i;
6678
6679         if (!adapter->msix_entries) {
6680                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6681                 igb_irq_disable(adapter);
6682                 napi_schedule(&q_vector->napi);
6683                 return;
6684         }
6685
6686         for (i = 0; i < adapter->num_q_vectors; i++) {
6687                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6688                 wr32(E1000_EIMC, q_vector->eims_value);
6689                 napi_schedule(&q_vector->napi);
6690         }
6691 }
6692 #endif /* CONFIG_NET_POLL_CONTROLLER */
6693
6694 /**
6695  * igb_io_error_detected - called when PCI error is detected
6696  * @pdev: Pointer to PCI device
6697  * @state: The current pci connection state
6698  *
6699  * This function is called after a PCI bus error affecting
6700  * this device has been detected.
6701  */
6702 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6703                                               pci_channel_state_t state)
6704 {
6705         struct net_device *netdev = pci_get_drvdata(pdev);
6706         struct igb_adapter *adapter = netdev_priv(netdev);
6707
6708         netif_device_detach(netdev);
6709
6710         if (state == pci_channel_io_perm_failure)
6711                 return PCI_ERS_RESULT_DISCONNECT;
6712
6713         if (netif_running(netdev))
6714                 igb_down(adapter);
6715         pci_disable_device(pdev);
6716
6717         /* Request a slot slot reset. */
6718         return PCI_ERS_RESULT_NEED_RESET;
6719 }
6720
6721 /**
6722  * igb_io_slot_reset - called after the pci bus has been reset.
6723  * @pdev: Pointer to PCI device
6724  *
6725  * Restart the card from scratch, as if from a cold-boot. Implementation
6726  * resembles the first-half of the igb_resume routine.
6727  */
6728 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6729 {
6730         struct net_device *netdev = pci_get_drvdata(pdev);
6731         struct igb_adapter *adapter = netdev_priv(netdev);
6732         struct e1000_hw *hw = &adapter->hw;
6733         pci_ers_result_t result;
6734         int err;
6735
6736         if (pci_enable_device_mem(pdev)) {
6737                 dev_err(&pdev->dev,
6738                         "Cannot re-enable PCI device after reset.\n");
6739                 result = PCI_ERS_RESULT_DISCONNECT;
6740         } else {
6741                 pci_set_master(pdev);
6742                 pci_restore_state(pdev);
6743                 pci_save_state(pdev);
6744
6745                 pci_enable_wake(pdev, PCI_D3hot, 0);
6746                 pci_enable_wake(pdev, PCI_D3cold, 0);
6747
6748                 igb_reset(adapter);
6749                 wr32(E1000_WUS, ~0);
6750                 result = PCI_ERS_RESULT_RECOVERED;
6751         }
6752
6753         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6754         if (err) {
6755                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6756                         "failed 0x%0x\n", err);
6757                 /* non-fatal, continue */
6758         }
6759
6760         return result;
6761 }
6762
6763 /**
6764  * igb_io_resume - called when traffic can start flowing again.
6765  * @pdev: Pointer to PCI device
6766  *
6767  * This callback is called when the error recovery driver tells us that
6768  * its OK to resume normal operation. Implementation resembles the
6769  * second-half of the igb_resume routine.
6770  */
6771 static void igb_io_resume(struct pci_dev *pdev)
6772 {
6773         struct net_device *netdev = pci_get_drvdata(pdev);
6774         struct igb_adapter *adapter = netdev_priv(netdev);
6775
6776         if (netif_running(netdev)) {
6777                 if (igb_up(adapter)) {
6778                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6779                         return;
6780                 }
6781         }
6782
6783         netif_device_attach(netdev);
6784
6785         /* let the f/w know that the h/w is now under the control of the
6786          * driver. */
6787         igb_get_hw_control(adapter);
6788 }
6789
6790 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6791                              u8 qsel)
6792 {
6793         u32 rar_low, rar_high;
6794         struct e1000_hw *hw = &adapter->hw;
6795
6796         /* HW expects these in little endian so we reverse the byte order
6797          * from network order (big endian) to little endian
6798          */
6799         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6800                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6801         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6802
6803         /* Indicate to hardware the Address is Valid. */
6804         rar_high |= E1000_RAH_AV;
6805
6806         if (hw->mac.type == e1000_82575)
6807                 rar_high |= E1000_RAH_POOL_1 * qsel;
6808         else
6809                 rar_high |= E1000_RAH_POOL_1 << qsel;
6810
6811         wr32(E1000_RAL(index), rar_low);
6812         wrfl();
6813         wr32(E1000_RAH(index), rar_high);
6814         wrfl();
6815 }
6816
6817 static int igb_set_vf_mac(struct igb_adapter *adapter,
6818                           int vf, unsigned char *mac_addr)
6819 {
6820         struct e1000_hw *hw = &adapter->hw;
6821         /* VF MAC addresses start at end of receive addresses and moves
6822          * torwards the first, as a result a collision should not be possible */
6823         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6824
6825         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6826
6827         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6828
6829         return 0;
6830 }
6831
6832 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6833 {
6834         struct igb_adapter *adapter = netdev_priv(netdev);
6835         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6836                 return -EINVAL;
6837         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6838         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6839         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6840                                       " change effective.");
6841         if (test_bit(__IGB_DOWN, &adapter->state)) {
6842                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6843                          " but the PF device is not up.\n");
6844                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6845                          " attempting to use the VF device.\n");
6846         }
6847         return igb_set_vf_mac(adapter, vf, mac);
6848 }
6849
6850 static int igb_link_mbps(int internal_link_speed)
6851 {
6852         switch (internal_link_speed) {
6853         case SPEED_100:
6854                 return 100;
6855         case SPEED_1000:
6856                 return 1000;
6857         default:
6858                 return 0;
6859         }
6860 }
6861
6862 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6863                                   int link_speed)
6864 {
6865         int rf_dec, rf_int;
6866         u32 bcnrc_val;
6867
6868         if (tx_rate != 0) {
6869                 /* Calculate the rate factor values to set */
6870                 rf_int = link_speed / tx_rate;
6871                 rf_dec = (link_speed - (rf_int * tx_rate));
6872                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6873
6874                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6875                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6876                                E1000_RTTBCNRC_RF_INT_MASK);
6877                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6878         } else {
6879                 bcnrc_val = 0;
6880         }
6881
6882         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6883         wr32(E1000_RTTBCNRC, bcnrc_val);
6884 }
6885
6886 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6887 {
6888         int actual_link_speed, i;
6889         bool reset_rate = false;
6890
6891         /* VF TX rate limit was not set or not supported */
6892         if ((adapter->vf_rate_link_speed == 0) ||
6893             (adapter->hw.mac.type != e1000_82576))
6894                 return;
6895
6896         actual_link_speed = igb_link_mbps(adapter->link_speed);
6897         if (actual_link_speed != adapter->vf_rate_link_speed) {
6898                 reset_rate = true;
6899                 adapter->vf_rate_link_speed = 0;
6900                 dev_info(&adapter->pdev->dev,
6901                          "Link speed has been changed. VF Transmit "
6902                          "rate is disabled\n");
6903         }
6904
6905         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6906                 if (reset_rate)
6907                         adapter->vf_data[i].tx_rate = 0;
6908
6909                 igb_set_vf_rate_limit(&adapter->hw, i,
6910                                       adapter->vf_data[i].tx_rate,
6911                                       actual_link_speed);
6912         }
6913 }
6914
6915 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6916 {
6917         struct igb_adapter *adapter = netdev_priv(netdev);
6918         struct e1000_hw *hw = &adapter->hw;
6919         int actual_link_speed;
6920
6921         if (hw->mac.type != e1000_82576)
6922                 return -EOPNOTSUPP;
6923
6924         actual_link_speed = igb_link_mbps(adapter->link_speed);
6925         if ((vf >= adapter->vfs_allocated_count) ||
6926             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6927             (tx_rate < 0) || (tx_rate > actual_link_speed))
6928                 return -EINVAL;
6929
6930         adapter->vf_rate_link_speed = actual_link_speed;
6931         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6932         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6933
6934         return 0;
6935 }
6936
6937 static int igb_ndo_get_vf_config(struct net_device *netdev,
6938                                  int vf, struct ifla_vf_info *ivi)
6939 {
6940         struct igb_adapter *adapter = netdev_priv(netdev);
6941         if (vf >= adapter->vfs_allocated_count)
6942                 return -EINVAL;
6943         ivi->vf = vf;
6944         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6945         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6946         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6947         ivi->qos = adapter->vf_data[vf].pf_qos;
6948         return 0;
6949 }
6950
6951 static void igb_vmm_control(struct igb_adapter *adapter)
6952 {
6953         struct e1000_hw *hw = &adapter->hw;
6954         u32 reg;
6955
6956         switch (hw->mac.type) {
6957         case e1000_82575:
6958         default:
6959                 /* replication is not supported for 82575 */
6960                 return;
6961         case e1000_82576:
6962                 /* notify HW that the MAC is adding vlan tags */
6963                 reg = rd32(E1000_DTXCTL);
6964                 reg |= E1000_DTXCTL_VLAN_ADDED;
6965                 wr32(E1000_DTXCTL, reg);
6966         case e1000_82580:
6967                 /* enable replication vlan tag stripping */
6968                 reg = rd32(E1000_RPLOLR);
6969                 reg |= E1000_RPLOLR_STRVLAN;
6970                 wr32(E1000_RPLOLR, reg);
6971         case e1000_i350:
6972                 /* none of the above registers are supported by i350 */
6973                 break;
6974         }
6975
6976         if (adapter->vfs_allocated_count) {
6977                 igb_vmdq_set_loopback_pf(hw, true);
6978                 igb_vmdq_set_replication_pf(hw, true);
6979                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6980                                                 adapter->vfs_allocated_count);
6981         } else {
6982                 igb_vmdq_set_loopback_pf(hw, false);
6983                 igb_vmdq_set_replication_pf(hw, false);
6984         }
6985 }
6986
6987 /* igb_main.c */