Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[firefly-linux-kernel-4.4.55.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
84         /* required last entry */
85         {0, }
86 };
87
88 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
89
90 void igb_reset(struct igb_adapter *);
91 static int igb_setup_all_tx_resources(struct igb_adapter *);
92 static int igb_setup_all_rx_resources(struct igb_adapter *);
93 static void igb_free_all_tx_resources(struct igb_adapter *);
94 static void igb_free_all_rx_resources(struct igb_adapter *);
95 static void igb_setup_mrqc(struct igb_adapter *);
96 void igb_update_stats(struct igb_adapter *);
97 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
98 static void __devexit igb_remove(struct pci_dev *pdev);
99 static int igb_sw_init(struct igb_adapter *);
100 static int igb_open(struct net_device *);
101 static int igb_close(struct net_device *);
102 static void igb_configure_tx(struct igb_adapter *);
103 static void igb_configure_rx(struct igb_adapter *);
104 static void igb_clean_all_tx_rings(struct igb_adapter *);
105 static void igb_clean_all_rx_rings(struct igb_adapter *);
106 static void igb_clean_tx_ring(struct igb_ring *);
107 static void igb_clean_rx_ring(struct igb_ring *);
108 static void igb_set_rx_mode(struct net_device *);
109 static void igb_update_phy_info(unsigned long);
110 static void igb_watchdog(unsigned long);
111 static void igb_watchdog_task(struct work_struct *);
112 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
113 static struct net_device_stats *igb_get_stats(struct net_device *);
114 static int igb_change_mtu(struct net_device *, int);
115 static int igb_set_mac(struct net_device *, void *);
116 static void igb_set_uta(struct igb_adapter *adapter);
117 static irqreturn_t igb_intr(int irq, void *);
118 static irqreturn_t igb_intr_msi(int irq, void *);
119 static irqreturn_t igb_msix_other(int irq, void *);
120 static irqreturn_t igb_msix_ring(int irq, void *);
121 #ifdef CONFIG_IGB_DCA
122 static void igb_update_dca(struct igb_q_vector *);
123 static void igb_setup_dca(struct igb_adapter *);
124 #endif /* CONFIG_IGB_DCA */
125 static bool igb_clean_tx_irq(struct igb_q_vector *);
126 static int igb_poll(struct napi_struct *, int);
127 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
128 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
129 static void igb_tx_timeout(struct net_device *);
130 static void igb_reset_task(struct work_struct *);
131 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
132 static void igb_vlan_rx_add_vid(struct net_device *, u16);
133 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
134 static void igb_restore_vlan(struct igb_adapter *);
135 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
136 static void igb_ping_all_vfs(struct igb_adapter *);
137 static void igb_msg_task(struct igb_adapter *);
138 static void igb_vmm_control(struct igb_adapter *);
139 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
140 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
141 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
142 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
143                                int vf, u16 vlan, u8 qos);
144 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
145 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
146                                  struct ifla_vf_info *ivi);
147
148 #ifdef CONFIG_PM
149 static int igb_suspend(struct pci_dev *, pm_message_t);
150 static int igb_resume(struct pci_dev *);
151 #endif
152 static void igb_shutdown(struct pci_dev *);
153 #ifdef CONFIG_IGB_DCA
154 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
155 static struct notifier_block dca_notifier = {
156         .notifier_call  = igb_notify_dca,
157         .next           = NULL,
158         .priority       = 0
159 };
160 #endif
161 #ifdef CONFIG_NET_POLL_CONTROLLER
162 /* for netdump / net console */
163 static void igb_netpoll(struct net_device *);
164 #endif
165 #ifdef CONFIG_PCI_IOV
166 static unsigned int max_vfs = 0;
167 module_param(max_vfs, uint, 0);
168 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
169                  "per physical function");
170 #endif /* CONFIG_PCI_IOV */
171
172 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
173                      pci_channel_state_t);
174 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
175 static void igb_io_resume(struct pci_dev *);
176
177 static struct pci_error_handlers igb_err_handler = {
178         .error_detected = igb_io_error_detected,
179         .slot_reset = igb_io_slot_reset,
180         .resume = igb_io_resume,
181 };
182
183
184 static struct pci_driver igb_driver = {
185         .name     = igb_driver_name,
186         .id_table = igb_pci_tbl,
187         .probe    = igb_probe,
188         .remove   = __devexit_p(igb_remove),
189 #ifdef CONFIG_PM
190         /* Power Managment Hooks */
191         .suspend  = igb_suspend,
192         .resume   = igb_resume,
193 #endif
194         .shutdown = igb_shutdown,
195         .err_handler = &igb_err_handler
196 };
197
198 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
199 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
200 MODULE_LICENSE("GPL");
201 MODULE_VERSION(DRV_VERSION);
202
203 /**
204  * igb_read_clock - read raw cycle counter (to be used by time counter)
205  */
206 static cycle_t igb_read_clock(const struct cyclecounter *tc)
207 {
208         struct igb_adapter *adapter =
209                 container_of(tc, struct igb_adapter, cycles);
210         struct e1000_hw *hw = &adapter->hw;
211         u64 stamp = 0;
212         int shift = 0;
213
214         /*
215          * The timestamp latches on lowest register read. For the 82580
216          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
217          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
218          */
219         if (hw->mac.type == e1000_82580) {
220                 stamp = rd32(E1000_SYSTIMR) >> 8;
221                 shift = IGB_82580_TSYNC_SHIFT;
222         }
223
224         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
225         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
226         return stamp;
227 }
228
229 /**
230  * igb_get_hw_dev - return device
231  * used by hardware layer to print debugging information
232  **/
233 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
234 {
235         struct igb_adapter *adapter = hw->back;
236         return adapter->netdev;
237 }
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->rss_queues; i++)
300                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
301                                                                Q_IDX_82576(i);
302                         for (; j < adapter->rss_queues; j++)
303                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
304                                                                Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         case e1000_82580:
308         case e1000_i350:
309         default:
310                 for (; i < adapter->num_rx_queues; i++)
311                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
312                 for (; j < adapter->num_tx_queues; j++)
313                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
314                 break;
315         }
316 }
317
318 static void igb_free_queues(struct igb_adapter *adapter)
319 {
320         int i;
321
322         for (i = 0; i < adapter->num_tx_queues; i++) {
323                 kfree(adapter->tx_ring[i]);
324                 adapter->tx_ring[i] = NULL;
325         }
326         for (i = 0; i < adapter->num_rx_queues; i++) {
327                 kfree(adapter->rx_ring[i]);
328                 adapter->rx_ring[i] = NULL;
329         }
330         adapter->num_rx_queues = 0;
331         adapter->num_tx_queues = 0;
332 }
333
334 /**
335  * igb_alloc_queues - Allocate memory for all rings
336  * @adapter: board private structure to initialize
337  *
338  * We allocate one ring per queue at run-time since we don't know the
339  * number of queues at compile-time.
340  **/
341 static int igb_alloc_queues(struct igb_adapter *adapter)
342 {
343         struct igb_ring *ring;
344         int i;
345
346         for (i = 0; i < adapter->num_tx_queues; i++) {
347                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
348                 if (!ring)
349                         goto err;
350                 ring->count = adapter->tx_ring_count;
351                 ring->queue_index = i;
352                 ring->pdev = adapter->pdev;
353                 ring->netdev = adapter->netdev;
354                 /* For 82575, context index must be unique per ring. */
355                 if (adapter->hw.mac.type == e1000_82575)
356                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
357                 adapter->tx_ring[i] = ring;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
362                 if (!ring)
363                         goto err;
364                 ring->count = adapter->rx_ring_count;
365                 ring->queue_index = i;
366                 ring->pdev = adapter->pdev;
367                 ring->netdev = adapter->netdev;
368                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
369                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
370                 /* set flag indicating ring supports SCTP checksum offload */
371                 if (adapter->hw.mac.type >= e1000_82576)
372                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
373                 adapter->rx_ring[i] = ring;
374         }
375
376         igb_cache_ring_register(adapter);
377
378         return 0;
379
380 err:
381         igb_free_queues(adapter);
382
383         return -ENOMEM;
384 }
385
386 #define IGB_N0_QUEUE -1
387 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
388 {
389         u32 msixbm = 0;
390         struct igb_adapter *adapter = q_vector->adapter;
391         struct e1000_hw *hw = &adapter->hw;
392         u32 ivar, index;
393         int rx_queue = IGB_N0_QUEUE;
394         int tx_queue = IGB_N0_QUEUE;
395
396         if (q_vector->rx_ring)
397                 rx_queue = q_vector->rx_ring->reg_idx;
398         if (q_vector->tx_ring)
399                 tx_queue = q_vector->tx_ring->reg_idx;
400
401         switch (hw->mac.type) {
402         case e1000_82575:
403                 /* The 82575 assigns vectors using a bitmask, which matches the
404                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
405                    or more queues to a vector, we write the appropriate bits
406                    into the MSIXBM register for that vector. */
407                 if (rx_queue > IGB_N0_QUEUE)
408                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
409                 if (tx_queue > IGB_N0_QUEUE)
410                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
411                 if (!adapter->msix_entries && msix_vector == 0)
412                         msixbm |= E1000_EIMS_OTHER;
413                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
414                 q_vector->eims_value = msixbm;
415                 break;
416         case e1000_82576:
417                 /* 82576 uses a table-based method for assigning vectors.
418                    Each queue has a single entry in the table to which we write
419                    a vector number along with a "valid" bit.  Sadly, the layout
420                    of the table is somewhat counterintuitive. */
421                 if (rx_queue > IGB_N0_QUEUE) {
422                         index = (rx_queue & 0x7);
423                         ivar = array_rd32(E1000_IVAR0, index);
424                         if (rx_queue < 8) {
425                                 /* vector goes into low byte of register */
426                                 ivar = ivar & 0xFFFFFF00;
427                                 ivar |= msix_vector | E1000_IVAR_VALID;
428                         } else {
429                                 /* vector goes into third byte of register */
430                                 ivar = ivar & 0xFF00FFFF;
431                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
432                         }
433                         array_wr32(E1000_IVAR0, index, ivar);
434                 }
435                 if (tx_queue > IGB_N0_QUEUE) {
436                         index = (tx_queue & 0x7);
437                         ivar = array_rd32(E1000_IVAR0, index);
438                         if (tx_queue < 8) {
439                                 /* vector goes into second byte of register */
440                                 ivar = ivar & 0xFFFF00FF;
441                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
442                         } else {
443                                 /* vector goes into high byte of register */
444                                 ivar = ivar & 0x00FFFFFF;
445                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
446                         }
447                         array_wr32(E1000_IVAR0, index, ivar);
448                 }
449                 q_vector->eims_value = 1 << msix_vector;
450                 break;
451         case e1000_82580:
452         case e1000_i350:
453                 /* 82580 uses the same table-based approach as 82576 but has fewer
454                    entries as a result we carry over for queues greater than 4. */
455                 if (rx_queue > IGB_N0_QUEUE) {
456                         index = (rx_queue >> 1);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (rx_queue & 0x1) {
459                                 /* vector goes into third byte of register */
460                                 ivar = ivar & 0xFF00FFFF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
462                         } else {
463                                 /* vector goes into low byte of register */
464                                 ivar = ivar & 0xFFFFFF00;
465                                 ivar |= msix_vector | E1000_IVAR_VALID;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 if (tx_queue > IGB_N0_QUEUE) {
470                         index = (tx_queue >> 1);
471                         ivar = array_rd32(E1000_IVAR0, index);
472                         if (tx_queue & 0x1) {
473                                 /* vector goes into high byte of register */
474                                 ivar = ivar & 0x00FFFFFF;
475                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
476                         } else {
477                                 /* vector goes into second byte of register */
478                                 ivar = ivar & 0xFFFF00FF;
479                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
480                         }
481                         array_wr32(E1000_IVAR0, index, ivar);
482                 }
483                 q_vector->eims_value = 1 << msix_vector;
484                 break;
485         default:
486                 BUG();
487                 break;
488         }
489
490         /* add q_vector eims value to global eims_enable_mask */
491         adapter->eims_enable_mask |= q_vector->eims_value;
492
493         /* configure q_vector to set itr on first interrupt */
494         q_vector->set_itr = 1;
495 }
496
497 /**
498  * igb_configure_msix - Configure MSI-X hardware
499  *
500  * igb_configure_msix sets up the hardware to properly
501  * generate MSI-X interrupts.
502  **/
503 static void igb_configure_msix(struct igb_adapter *adapter)
504 {
505         u32 tmp;
506         int i, vector = 0;
507         struct e1000_hw *hw = &adapter->hw;
508
509         adapter->eims_enable_mask = 0;
510
511         /* set vector for other causes, i.e. link changes */
512         switch (hw->mac.type) {
513         case e1000_82575:
514                 tmp = rd32(E1000_CTRL_EXT);
515                 /* enable MSI-X PBA support*/
516                 tmp |= E1000_CTRL_EXT_PBA_CLR;
517
518                 /* Auto-Mask interrupts upon ICR read. */
519                 tmp |= E1000_CTRL_EXT_EIAME;
520                 tmp |= E1000_CTRL_EXT_IRCA;
521
522                 wr32(E1000_CTRL_EXT, tmp);
523
524                 /* enable msix_other interrupt */
525                 array_wr32(E1000_MSIXBM(0), vector++,
526                                       E1000_EIMS_OTHER);
527                 adapter->eims_other = E1000_EIMS_OTHER;
528
529                 break;
530
531         case e1000_82576:
532         case e1000_82580:
533         case e1000_i350:
534                 /* Turn on MSI-X capability first, or our settings
535                  * won't stick.  And it will take days to debug. */
536                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
537                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
538                                 E1000_GPIE_NSICR);
539
540                 /* enable msix_other interrupt */
541                 adapter->eims_other = 1 << vector;
542                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
543
544                 wr32(E1000_IVAR_MISC, tmp);
545                 break;
546         default:
547                 /* do nothing, since nothing else supports MSI-X */
548                 break;
549         } /* switch (hw->mac.type) */
550
551         adapter->eims_enable_mask |= adapter->eims_other;
552
553         for (i = 0; i < adapter->num_q_vectors; i++)
554                 igb_assign_vector(adapter->q_vector[i], vector++);
555
556         wrfl();
557 }
558
559 /**
560  * igb_request_msix - Initialize MSI-X interrupts
561  *
562  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
563  * kernel.
564  **/
565 static int igb_request_msix(struct igb_adapter *adapter)
566 {
567         struct net_device *netdev = adapter->netdev;
568         struct e1000_hw *hw = &adapter->hw;
569         int i, err = 0, vector = 0;
570
571         err = request_irq(adapter->msix_entries[vector].vector,
572                           igb_msix_other, 0, netdev->name, adapter);
573         if (err)
574                 goto out;
575         vector++;
576
577         for (i = 0; i < adapter->num_q_vectors; i++) {
578                 struct igb_q_vector *q_vector = adapter->q_vector[i];
579
580                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
581
582                 if (q_vector->rx_ring && q_vector->tx_ring)
583                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
584                                 q_vector->rx_ring->queue_index);
585                 else if (q_vector->tx_ring)
586                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
587                                 q_vector->tx_ring->queue_index);
588                 else if (q_vector->rx_ring)
589                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
590                                 q_vector->rx_ring->queue_index);
591                 else
592                         sprintf(q_vector->name, "%s-unused", netdev->name);
593
594                 err = request_irq(adapter->msix_entries[vector].vector,
595                                   igb_msix_ring, 0, q_vector->name,
596                                   q_vector);
597                 if (err)
598                         goto out;
599                 vector++;
600         }
601
602         igb_configure_msix(adapter);
603         return 0;
604 out:
605         return err;
606 }
607
608 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
609 {
610         if (adapter->msix_entries) {
611                 pci_disable_msix(adapter->pdev);
612                 kfree(adapter->msix_entries);
613                 adapter->msix_entries = NULL;
614         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
615                 pci_disable_msi(adapter->pdev);
616         }
617 }
618
619 /**
620  * igb_free_q_vectors - Free memory allocated for interrupt vectors
621  * @adapter: board private structure to initialize
622  *
623  * This function frees the memory allocated to the q_vectors.  In addition if
624  * NAPI is enabled it will delete any references to the NAPI struct prior
625  * to freeing the q_vector.
626  **/
627 static void igb_free_q_vectors(struct igb_adapter *adapter)
628 {
629         int v_idx;
630
631         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
632                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
633                 adapter->q_vector[v_idx] = NULL;
634                 if (!q_vector)
635                         continue;
636                 netif_napi_del(&q_vector->napi);
637                 kfree(q_vector);
638         }
639         adapter->num_q_vectors = 0;
640 }
641
642 /**
643  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
644  *
645  * This function resets the device so that it has 0 rx queues, tx queues, and
646  * MSI-X interrupts allocated.
647  */
648 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
649 {
650         igb_free_queues(adapter);
651         igb_free_q_vectors(adapter);
652         igb_reset_interrupt_capability(adapter);
653 }
654
655 /**
656  * igb_set_interrupt_capability - set MSI or MSI-X if supported
657  *
658  * Attempt to configure interrupts using the best available
659  * capabilities of the hardware and kernel.
660  **/
661 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
662 {
663         int err;
664         int numvecs, i;
665
666         /* Number of supported queues. */
667         adapter->num_rx_queues = adapter->rss_queues;
668         adapter->num_tx_queues = adapter->rss_queues;
669
670         /* start with one vector for every rx queue */
671         numvecs = adapter->num_rx_queues;
672
673         /* if tx handler is separate add 1 for every tx queue */
674         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
675                 numvecs += adapter->num_tx_queues;
676
677         /* store the number of vectors reserved for queues */
678         adapter->num_q_vectors = numvecs;
679
680         /* add 1 vector for link status interrupts */
681         numvecs++;
682         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
683                                         GFP_KERNEL);
684         if (!adapter->msix_entries)
685                 goto msi_only;
686
687         for (i = 0; i < numvecs; i++)
688                 adapter->msix_entries[i].entry = i;
689
690         err = pci_enable_msix(adapter->pdev,
691                               adapter->msix_entries,
692                               numvecs);
693         if (err == 0)
694                 goto out;
695
696         igb_reset_interrupt_capability(adapter);
697
698         /* If we can't do MSI-X, try MSI */
699 msi_only:
700 #ifdef CONFIG_PCI_IOV
701         /* disable SR-IOV for non MSI-X configurations */
702         if (adapter->vf_data) {
703                 struct e1000_hw *hw = &adapter->hw;
704                 /* disable iov and allow time for transactions to clear */
705                 pci_disable_sriov(adapter->pdev);
706                 msleep(500);
707
708                 kfree(adapter->vf_data);
709                 adapter->vf_data = NULL;
710                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
711                 msleep(100);
712                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
713         }
714 #endif
715         adapter->vfs_allocated_count = 0;
716         adapter->rss_queues = 1;
717         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
718         adapter->num_rx_queues = 1;
719         adapter->num_tx_queues = 1;
720         adapter->num_q_vectors = 1;
721         if (!pci_enable_msi(adapter->pdev))
722                 adapter->flags |= IGB_FLAG_HAS_MSI;
723 out:
724         /* Notify the stack of the (possibly) reduced Tx Queue count. */
725         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
726         return;
727 }
728
729 /**
730  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
731  * @adapter: board private structure to initialize
732  *
733  * We allocate one q_vector per queue interrupt.  If allocation fails we
734  * return -ENOMEM.
735  **/
736 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
737 {
738         struct igb_q_vector *q_vector;
739         struct e1000_hw *hw = &adapter->hw;
740         int v_idx;
741
742         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
743                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
744                 if (!q_vector)
745                         goto err_out;
746                 q_vector->adapter = adapter;
747                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
748                 q_vector->itr_val = IGB_START_ITR;
749                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
750                 adapter->q_vector[v_idx] = q_vector;
751         }
752         return 0;
753
754 err_out:
755         igb_free_q_vectors(adapter);
756         return -ENOMEM;
757 }
758
759 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
760                                       int ring_idx, int v_idx)
761 {
762         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
763
764         q_vector->rx_ring = adapter->rx_ring[ring_idx];
765         q_vector->rx_ring->q_vector = q_vector;
766         q_vector->itr_val = adapter->rx_itr_setting;
767         if (q_vector->itr_val && q_vector->itr_val <= 3)
768                 q_vector->itr_val = IGB_START_ITR;
769 }
770
771 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
772                                       int ring_idx, int v_idx)
773 {
774         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
775
776         q_vector->tx_ring = adapter->tx_ring[ring_idx];
777         q_vector->tx_ring->q_vector = q_vector;
778         q_vector->itr_val = adapter->tx_itr_setting;
779         if (q_vector->itr_val && q_vector->itr_val <= 3)
780                 q_vector->itr_val = IGB_START_ITR;
781 }
782
783 /**
784  * igb_map_ring_to_vector - maps allocated queues to vectors
785  *
786  * This function maps the recently allocated queues to vectors.
787  **/
788 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
789 {
790         int i;
791         int v_idx = 0;
792
793         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
794             (adapter->num_q_vectors < adapter->num_tx_queues))
795                 return -ENOMEM;
796
797         if (adapter->num_q_vectors >=
798             (adapter->num_rx_queues + adapter->num_tx_queues)) {
799                 for (i = 0; i < adapter->num_rx_queues; i++)
800                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
801                 for (i = 0; i < adapter->num_tx_queues; i++)
802                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
803         } else {
804                 for (i = 0; i < adapter->num_rx_queues; i++) {
805                         if (i < adapter->num_tx_queues)
806                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
807                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
808                 }
809                 for (; i < adapter->num_tx_queues; i++)
810                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
811         }
812         return 0;
813 }
814
815 /**
816  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
817  *
818  * This function initializes the interrupts and allocates all of the queues.
819  **/
820 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
821 {
822         struct pci_dev *pdev = adapter->pdev;
823         int err;
824
825         igb_set_interrupt_capability(adapter);
826
827         err = igb_alloc_q_vectors(adapter);
828         if (err) {
829                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
830                 goto err_alloc_q_vectors;
831         }
832
833         err = igb_alloc_queues(adapter);
834         if (err) {
835                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
836                 goto err_alloc_queues;
837         }
838
839         err = igb_map_ring_to_vector(adapter);
840         if (err) {
841                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
842                 goto err_map_queues;
843         }
844
845
846         return 0;
847 err_map_queues:
848         igb_free_queues(adapter);
849 err_alloc_queues:
850         igb_free_q_vectors(adapter);
851 err_alloc_q_vectors:
852         igb_reset_interrupt_capability(adapter);
853         return err;
854 }
855
856 /**
857  * igb_request_irq - initialize interrupts
858  *
859  * Attempts to configure interrupts using the best available
860  * capabilities of the hardware and kernel.
861  **/
862 static int igb_request_irq(struct igb_adapter *adapter)
863 {
864         struct net_device *netdev = adapter->netdev;
865         struct pci_dev *pdev = adapter->pdev;
866         int err = 0;
867
868         if (adapter->msix_entries) {
869                 err = igb_request_msix(adapter);
870                 if (!err)
871                         goto request_done;
872                 /* fall back to MSI */
873                 igb_clear_interrupt_scheme(adapter);
874                 if (!pci_enable_msi(adapter->pdev))
875                         adapter->flags |= IGB_FLAG_HAS_MSI;
876                 igb_free_all_tx_resources(adapter);
877                 igb_free_all_rx_resources(adapter);
878                 adapter->num_tx_queues = 1;
879                 adapter->num_rx_queues = 1;
880                 adapter->num_q_vectors = 1;
881                 err = igb_alloc_q_vectors(adapter);
882                 if (err) {
883                         dev_err(&pdev->dev,
884                                 "Unable to allocate memory for vectors\n");
885                         goto request_done;
886                 }
887                 err = igb_alloc_queues(adapter);
888                 if (err) {
889                         dev_err(&pdev->dev,
890                                 "Unable to allocate memory for queues\n");
891                         igb_free_q_vectors(adapter);
892                         goto request_done;
893                 }
894                 igb_setup_all_tx_resources(adapter);
895                 igb_setup_all_rx_resources(adapter);
896         } else {
897                 igb_assign_vector(adapter->q_vector[0], 0);
898         }
899
900         if (adapter->flags & IGB_FLAG_HAS_MSI) {
901                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
902                                   netdev->name, adapter);
903                 if (!err)
904                         goto request_done;
905
906                 /* fall back to legacy interrupts */
907                 igb_reset_interrupt_capability(adapter);
908                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
909         }
910
911         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
912                           netdev->name, adapter);
913
914         if (err)
915                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
916                         err);
917
918 request_done:
919         return err;
920 }
921
922 static void igb_free_irq(struct igb_adapter *adapter)
923 {
924         if (adapter->msix_entries) {
925                 int vector = 0, i;
926
927                 free_irq(adapter->msix_entries[vector++].vector, adapter);
928
929                 for (i = 0; i < adapter->num_q_vectors; i++) {
930                         struct igb_q_vector *q_vector = adapter->q_vector[i];
931                         free_irq(adapter->msix_entries[vector++].vector,
932                                  q_vector);
933                 }
934         } else {
935                 free_irq(adapter->pdev->irq, adapter);
936         }
937 }
938
939 /**
940  * igb_irq_disable - Mask off interrupt generation on the NIC
941  * @adapter: board private structure
942  **/
943 static void igb_irq_disable(struct igb_adapter *adapter)
944 {
945         struct e1000_hw *hw = &adapter->hw;
946
947         /*
948          * we need to be careful when disabling interrupts.  The VFs are also
949          * mapped into these registers and so clearing the bits can cause
950          * issues on the VF drivers so we only need to clear what we set
951          */
952         if (adapter->msix_entries) {
953                 u32 regval = rd32(E1000_EIAM);
954                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
955                 wr32(E1000_EIMC, adapter->eims_enable_mask);
956                 regval = rd32(E1000_EIAC);
957                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
958         }
959
960         wr32(E1000_IAM, 0);
961         wr32(E1000_IMC, ~0);
962         wrfl();
963         synchronize_irq(adapter->pdev->irq);
964 }
965
966 /**
967  * igb_irq_enable - Enable default interrupt generation settings
968  * @adapter: board private structure
969  **/
970 static void igb_irq_enable(struct igb_adapter *adapter)
971 {
972         struct e1000_hw *hw = &adapter->hw;
973
974         if (adapter->msix_entries) {
975                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
976                 u32 regval = rd32(E1000_EIAC);
977                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
978                 regval = rd32(E1000_EIAM);
979                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
980                 wr32(E1000_EIMS, adapter->eims_enable_mask);
981                 if (adapter->vfs_allocated_count) {
982                         wr32(E1000_MBVFIMR, 0xFF);
983                         ims |= E1000_IMS_VMMB;
984                 }
985                 if (adapter->hw.mac.type == e1000_82580)
986                         ims |= E1000_IMS_DRSTA;
987
988                 wr32(E1000_IMS, ims);
989         } else {
990                 wr32(E1000_IMS, IMS_ENABLE_MASK |
991                                 E1000_IMS_DRSTA);
992                 wr32(E1000_IAM, IMS_ENABLE_MASK |
993                                 E1000_IMS_DRSTA);
994         }
995 }
996
997 static void igb_update_mng_vlan(struct igb_adapter *adapter)
998 {
999         struct e1000_hw *hw = &adapter->hw;
1000         u16 vid = adapter->hw.mng_cookie.vlan_id;
1001         u16 old_vid = adapter->mng_vlan_id;
1002
1003         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1004                 /* add VID to filter table */
1005                 igb_vfta_set(hw, vid, true);
1006                 adapter->mng_vlan_id = vid;
1007         } else {
1008                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1009         }
1010
1011         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1012             (vid != old_vid) &&
1013             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1014                 /* remove VID from filter table */
1015                 igb_vfta_set(hw, old_vid, false);
1016         }
1017 }
1018
1019 /**
1020  * igb_release_hw_control - release control of the h/w to f/w
1021  * @adapter: address of board private structure
1022  *
1023  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1024  * For ASF and Pass Through versions of f/w this means that the
1025  * driver is no longer loaded.
1026  *
1027  **/
1028 static void igb_release_hw_control(struct igb_adapter *adapter)
1029 {
1030         struct e1000_hw *hw = &adapter->hw;
1031         u32 ctrl_ext;
1032
1033         /* Let firmware take over control of h/w */
1034         ctrl_ext = rd32(E1000_CTRL_EXT);
1035         wr32(E1000_CTRL_EXT,
1036                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1037 }
1038
1039 /**
1040  * igb_get_hw_control - get control of the h/w from f/w
1041  * @adapter: address of board private structure
1042  *
1043  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1044  * For ASF and Pass Through versions of f/w this means that
1045  * the driver is loaded.
1046  *
1047  **/
1048 static void igb_get_hw_control(struct igb_adapter *adapter)
1049 {
1050         struct e1000_hw *hw = &adapter->hw;
1051         u32 ctrl_ext;
1052
1053         /* Let firmware know the driver has taken over */
1054         ctrl_ext = rd32(E1000_CTRL_EXT);
1055         wr32(E1000_CTRL_EXT,
1056                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1057 }
1058
1059 /**
1060  * igb_configure - configure the hardware for RX and TX
1061  * @adapter: private board structure
1062  **/
1063 static void igb_configure(struct igb_adapter *adapter)
1064 {
1065         struct net_device *netdev = adapter->netdev;
1066         int i;
1067
1068         igb_get_hw_control(adapter);
1069         igb_set_rx_mode(netdev);
1070
1071         igb_restore_vlan(adapter);
1072
1073         igb_setup_tctl(adapter);
1074         igb_setup_mrqc(adapter);
1075         igb_setup_rctl(adapter);
1076
1077         igb_configure_tx(adapter);
1078         igb_configure_rx(adapter);
1079
1080         igb_rx_fifo_flush_82575(&adapter->hw);
1081
1082         /* call igb_desc_unused which always leaves
1083          * at least 1 descriptor unused to make sure
1084          * next_to_use != next_to_clean */
1085         for (i = 0; i < adapter->num_rx_queues; i++) {
1086                 struct igb_ring *ring = adapter->rx_ring[i];
1087                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1088         }
1089 }
1090
1091 /**
1092  * igb_power_up_link - Power up the phy/serdes link
1093  * @adapter: address of board private structure
1094  **/
1095 void igb_power_up_link(struct igb_adapter *adapter)
1096 {
1097         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1098                 igb_power_up_phy_copper(&adapter->hw);
1099         else
1100                 igb_power_up_serdes_link_82575(&adapter->hw);
1101 }
1102
1103 /**
1104  * igb_power_down_link - Power down the phy/serdes link
1105  * @adapter: address of board private structure
1106  */
1107 static void igb_power_down_link(struct igb_adapter *adapter)
1108 {
1109         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1110                 igb_power_down_phy_copper_82575(&adapter->hw);
1111         else
1112                 igb_shutdown_serdes_link_82575(&adapter->hw);
1113 }
1114
1115 /**
1116  * igb_up - Open the interface and prepare it to handle traffic
1117  * @adapter: board private structure
1118  **/
1119 int igb_up(struct igb_adapter *adapter)
1120 {
1121         struct e1000_hw *hw = &adapter->hw;
1122         int i;
1123
1124         /* hardware has been reset, we need to reload some things */
1125         igb_configure(adapter);
1126
1127         clear_bit(__IGB_DOWN, &adapter->state);
1128
1129         for (i = 0; i < adapter->num_q_vectors; i++) {
1130                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1131                 napi_enable(&q_vector->napi);
1132         }
1133         if (adapter->msix_entries)
1134                 igb_configure_msix(adapter);
1135         else
1136                 igb_assign_vector(adapter->q_vector[0], 0);
1137
1138         /* Clear any pending interrupts. */
1139         rd32(E1000_ICR);
1140         igb_irq_enable(adapter);
1141
1142         /* notify VFs that reset has been completed */
1143         if (adapter->vfs_allocated_count) {
1144                 u32 reg_data = rd32(E1000_CTRL_EXT);
1145                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1146                 wr32(E1000_CTRL_EXT, reg_data);
1147         }
1148
1149         netif_tx_start_all_queues(adapter->netdev);
1150
1151         /* start the watchdog. */
1152         hw->mac.get_link_status = 1;
1153         schedule_work(&adapter->watchdog_task);
1154
1155         return 0;
1156 }
1157
1158 void igb_down(struct igb_adapter *adapter)
1159 {
1160         struct net_device *netdev = adapter->netdev;
1161         struct e1000_hw *hw = &adapter->hw;
1162         u32 tctl, rctl;
1163         int i;
1164
1165         /* signal that we're down so the interrupt handler does not
1166          * reschedule our watchdog timer */
1167         set_bit(__IGB_DOWN, &adapter->state);
1168
1169         /* disable receives in the hardware */
1170         rctl = rd32(E1000_RCTL);
1171         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1172         /* flush and sleep below */
1173
1174         netif_tx_stop_all_queues(netdev);
1175
1176         /* disable transmits in the hardware */
1177         tctl = rd32(E1000_TCTL);
1178         tctl &= ~E1000_TCTL_EN;
1179         wr32(E1000_TCTL, tctl);
1180         /* flush both disables and wait for them to finish */
1181         wrfl();
1182         msleep(10);
1183
1184         for (i = 0; i < adapter->num_q_vectors; i++) {
1185                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1186                 napi_disable(&q_vector->napi);
1187         }
1188
1189         igb_irq_disable(adapter);
1190
1191         del_timer_sync(&adapter->watchdog_timer);
1192         del_timer_sync(&adapter->phy_info_timer);
1193
1194         netif_carrier_off(netdev);
1195
1196         /* record the stats before reset*/
1197         igb_update_stats(adapter);
1198
1199         adapter->link_speed = 0;
1200         adapter->link_duplex = 0;
1201
1202         if (!pci_channel_offline(adapter->pdev))
1203                 igb_reset(adapter);
1204         igb_clean_all_tx_rings(adapter);
1205         igb_clean_all_rx_rings(adapter);
1206 #ifdef CONFIG_IGB_DCA
1207
1208         /* since we reset the hardware DCA settings were cleared */
1209         igb_setup_dca(adapter);
1210 #endif
1211 }
1212
1213 void igb_reinit_locked(struct igb_adapter *adapter)
1214 {
1215         WARN_ON(in_interrupt());
1216         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1217                 msleep(1);
1218         igb_down(adapter);
1219         igb_up(adapter);
1220         clear_bit(__IGB_RESETTING, &adapter->state);
1221 }
1222
1223 void igb_reset(struct igb_adapter *adapter)
1224 {
1225         struct pci_dev *pdev = adapter->pdev;
1226         struct e1000_hw *hw = &adapter->hw;
1227         struct e1000_mac_info *mac = &hw->mac;
1228         struct e1000_fc_info *fc = &hw->fc;
1229         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1230         u16 hwm;
1231
1232         /* Repartition Pba for greater than 9k mtu
1233          * To take effect CTRL.RST is required.
1234          */
1235         switch (mac->type) {
1236         case e1000_i350:
1237         case e1000_82580:
1238                 pba = rd32(E1000_RXPBS);
1239                 pba = igb_rxpbs_adjust_82580(pba);
1240                 break;
1241         case e1000_82576:
1242                 pba = rd32(E1000_RXPBS);
1243                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1244                 break;
1245         case e1000_82575:
1246         default:
1247                 pba = E1000_PBA_34K;
1248                 break;
1249         }
1250
1251         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1252             (mac->type < e1000_82576)) {
1253                 /* adjust PBA for jumbo frames */
1254                 wr32(E1000_PBA, pba);
1255
1256                 /* To maintain wire speed transmits, the Tx FIFO should be
1257                  * large enough to accommodate two full transmit packets,
1258                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1259                  * the Rx FIFO should be large enough to accommodate at least
1260                  * one full receive packet and is similarly rounded up and
1261                  * expressed in KB. */
1262                 pba = rd32(E1000_PBA);
1263                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1264                 tx_space = pba >> 16;
1265                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1266                 pba &= 0xffff;
1267                 /* the tx fifo also stores 16 bytes of information about the tx
1268                  * but don't include ethernet FCS because hardware appends it */
1269                 min_tx_space = (adapter->max_frame_size +
1270                                 sizeof(union e1000_adv_tx_desc) -
1271                                 ETH_FCS_LEN) * 2;
1272                 min_tx_space = ALIGN(min_tx_space, 1024);
1273                 min_tx_space >>= 10;
1274                 /* software strips receive CRC, so leave room for it */
1275                 min_rx_space = adapter->max_frame_size;
1276                 min_rx_space = ALIGN(min_rx_space, 1024);
1277                 min_rx_space >>= 10;
1278
1279                 /* If current Tx allocation is less than the min Tx FIFO size,
1280                  * and the min Tx FIFO size is less than the current Rx FIFO
1281                  * allocation, take space away from current Rx allocation */
1282                 if (tx_space < min_tx_space &&
1283                     ((min_tx_space - tx_space) < pba)) {
1284                         pba = pba - (min_tx_space - tx_space);
1285
1286                         /* if short on rx space, rx wins and must trump tx
1287                          * adjustment */
1288                         if (pba < min_rx_space)
1289                                 pba = min_rx_space;
1290                 }
1291                 wr32(E1000_PBA, pba);
1292         }
1293
1294         /* flow control settings */
1295         /* The high water mark must be low enough to fit one full frame
1296          * (or the size used for early receive) above it in the Rx FIFO.
1297          * Set it to the lower of:
1298          * - 90% of the Rx FIFO size, or
1299          * - the full Rx FIFO size minus one full frame */
1300         hwm = min(((pba << 10) * 9 / 10),
1301                         ((pba << 10) - 2 * adapter->max_frame_size));
1302
1303         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1304         fc->low_water = fc->high_water - 16;
1305         fc->pause_time = 0xFFFF;
1306         fc->send_xon = 1;
1307         fc->current_mode = fc->requested_mode;
1308
1309         /* disable receive for all VFs and wait one second */
1310         if (adapter->vfs_allocated_count) {
1311                 int i;
1312                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1313                         adapter->vf_data[i].flags = 0;
1314
1315                 /* ping all the active vfs to let them know we are going down */
1316                 igb_ping_all_vfs(adapter);
1317
1318                 /* disable transmits and receives */
1319                 wr32(E1000_VFRE, 0);
1320                 wr32(E1000_VFTE, 0);
1321         }
1322
1323         /* Allow time for pending master requests to run */
1324         hw->mac.ops.reset_hw(hw);
1325         wr32(E1000_WUC, 0);
1326
1327         if (hw->mac.ops.init_hw(hw))
1328                 dev_err(&pdev->dev, "Hardware Error\n");
1329
1330         if (hw->mac.type == e1000_82580) {
1331                 u32 reg = rd32(E1000_PCIEMISC);
1332                 wr32(E1000_PCIEMISC,
1333                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1334         }
1335         if (!netif_running(adapter->netdev))
1336                 igb_power_down_link(adapter);
1337
1338         igb_update_mng_vlan(adapter);
1339
1340         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1341         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1342
1343         igb_get_phy_info(hw);
1344 }
1345
1346 static const struct net_device_ops igb_netdev_ops = {
1347         .ndo_open               = igb_open,
1348         .ndo_stop               = igb_close,
1349         .ndo_start_xmit         = igb_xmit_frame_adv,
1350         .ndo_get_stats          = igb_get_stats,
1351         .ndo_set_rx_mode        = igb_set_rx_mode,
1352         .ndo_set_multicast_list = igb_set_rx_mode,
1353         .ndo_set_mac_address    = igb_set_mac,
1354         .ndo_change_mtu         = igb_change_mtu,
1355         .ndo_do_ioctl           = igb_ioctl,
1356         .ndo_tx_timeout         = igb_tx_timeout,
1357         .ndo_validate_addr      = eth_validate_addr,
1358         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1359         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1360         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1361         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1362         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1363         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1364         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1365 #ifdef CONFIG_NET_POLL_CONTROLLER
1366         .ndo_poll_controller    = igb_netpoll,
1367 #endif
1368 };
1369
1370 /**
1371  * igb_probe - Device Initialization Routine
1372  * @pdev: PCI device information struct
1373  * @ent: entry in igb_pci_tbl
1374  *
1375  * Returns 0 on success, negative on failure
1376  *
1377  * igb_probe initializes an adapter identified by a pci_dev structure.
1378  * The OS initialization, configuring of the adapter private structure,
1379  * and a hardware reset occur.
1380  **/
1381 static int __devinit igb_probe(struct pci_dev *pdev,
1382                                const struct pci_device_id *ent)
1383 {
1384         struct net_device *netdev;
1385         struct igb_adapter *adapter;
1386         struct e1000_hw *hw;
1387         u16 eeprom_data = 0;
1388         static int global_quad_port_a; /* global quad port a indication */
1389         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1390         unsigned long mmio_start, mmio_len;
1391         int err, pci_using_dac;
1392         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1393         u32 part_num;
1394
1395         err = pci_enable_device_mem(pdev);
1396         if (err)
1397                 return err;
1398
1399         pci_using_dac = 0;
1400         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1401         if (!err) {
1402                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1403                 if (!err)
1404                         pci_using_dac = 1;
1405         } else {
1406                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1407                 if (err) {
1408                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1409                         if (err) {
1410                                 dev_err(&pdev->dev, "No usable DMA "
1411                                         "configuration, aborting\n");
1412                                 goto err_dma;
1413                         }
1414                 }
1415         }
1416
1417         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1418                                            IORESOURCE_MEM),
1419                                            igb_driver_name);
1420         if (err)
1421                 goto err_pci_reg;
1422
1423         pci_enable_pcie_error_reporting(pdev);
1424
1425         pci_set_master(pdev);
1426         pci_save_state(pdev);
1427
1428         err = -ENOMEM;
1429         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1430                                    IGB_ABS_MAX_TX_QUEUES);
1431         if (!netdev)
1432                 goto err_alloc_etherdev;
1433
1434         SET_NETDEV_DEV(netdev, &pdev->dev);
1435
1436         pci_set_drvdata(pdev, netdev);
1437         adapter = netdev_priv(netdev);
1438         adapter->netdev = netdev;
1439         adapter->pdev = pdev;
1440         hw = &adapter->hw;
1441         hw->back = adapter;
1442         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1443
1444         mmio_start = pci_resource_start(pdev, 0);
1445         mmio_len = pci_resource_len(pdev, 0);
1446
1447         err = -EIO;
1448         hw->hw_addr = ioremap(mmio_start, mmio_len);
1449         if (!hw->hw_addr)
1450                 goto err_ioremap;
1451
1452         netdev->netdev_ops = &igb_netdev_ops;
1453         igb_set_ethtool_ops(netdev);
1454         netdev->watchdog_timeo = 5 * HZ;
1455
1456         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1457
1458         netdev->mem_start = mmio_start;
1459         netdev->mem_end = mmio_start + mmio_len;
1460
1461         /* PCI config space info */
1462         hw->vendor_id = pdev->vendor;
1463         hw->device_id = pdev->device;
1464         hw->revision_id = pdev->revision;
1465         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1466         hw->subsystem_device_id = pdev->subsystem_device;
1467
1468         /* Copy the default MAC, PHY and NVM function pointers */
1469         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1470         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1471         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1472         /* Initialize skew-specific constants */
1473         err = ei->get_invariants(hw);
1474         if (err)
1475                 goto err_sw_init;
1476
1477         /* setup the private structure */
1478         err = igb_sw_init(adapter);
1479         if (err)
1480                 goto err_sw_init;
1481
1482         igb_get_bus_info_pcie(hw);
1483
1484         hw->phy.autoneg_wait_to_complete = false;
1485
1486         /* Copper options */
1487         if (hw->phy.media_type == e1000_media_type_copper) {
1488                 hw->phy.mdix = AUTO_ALL_MODES;
1489                 hw->phy.disable_polarity_correction = false;
1490                 hw->phy.ms_type = e1000_ms_hw_default;
1491         }
1492
1493         if (igb_check_reset_block(hw))
1494                 dev_info(&pdev->dev,
1495                         "PHY reset is blocked due to SOL/IDER session.\n");
1496
1497         netdev->features = NETIF_F_SG |
1498                            NETIF_F_IP_CSUM |
1499                            NETIF_F_HW_VLAN_TX |
1500                            NETIF_F_HW_VLAN_RX |
1501                            NETIF_F_HW_VLAN_FILTER;
1502
1503         netdev->features |= NETIF_F_IPV6_CSUM;
1504         netdev->features |= NETIF_F_TSO;
1505         netdev->features |= NETIF_F_TSO6;
1506         netdev->features |= NETIF_F_GRO;
1507
1508         netdev->vlan_features |= NETIF_F_TSO;
1509         netdev->vlan_features |= NETIF_F_TSO6;
1510         netdev->vlan_features |= NETIF_F_IP_CSUM;
1511         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1512         netdev->vlan_features |= NETIF_F_SG;
1513
1514         if (pci_using_dac)
1515                 netdev->features |= NETIF_F_HIGHDMA;
1516
1517         if (hw->mac.type >= e1000_82576)
1518                 netdev->features |= NETIF_F_SCTP_CSUM;
1519
1520         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1521
1522         /* before reading the NVM, reset the controller to put the device in a
1523          * known good starting state */
1524         hw->mac.ops.reset_hw(hw);
1525
1526         /* make sure the NVM is good */
1527         if (igb_validate_nvm_checksum(hw) < 0) {
1528                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1529                 err = -EIO;
1530                 goto err_eeprom;
1531         }
1532
1533         /* copy the MAC address out of the NVM */
1534         if (hw->mac.ops.read_mac_addr(hw))
1535                 dev_err(&pdev->dev, "NVM Read Error\n");
1536
1537         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1538         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1539
1540         if (!is_valid_ether_addr(netdev->perm_addr)) {
1541                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1542                 err = -EIO;
1543                 goto err_eeprom;
1544         }
1545
1546         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1547                     (unsigned long) adapter);
1548         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1549                     (unsigned long) adapter);
1550
1551         INIT_WORK(&adapter->reset_task, igb_reset_task);
1552         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1553
1554         /* Initialize link properties that are user-changeable */
1555         adapter->fc_autoneg = true;
1556         hw->mac.autoneg = true;
1557         hw->phy.autoneg_advertised = 0x2f;
1558
1559         hw->fc.requested_mode = e1000_fc_default;
1560         hw->fc.current_mode = e1000_fc_default;
1561
1562         igb_validate_mdi_setting(hw);
1563
1564         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1565          * enable the ACPI Magic Packet filter
1566          */
1567
1568         if (hw->bus.func == 0)
1569                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1570         else if (hw->mac.type == e1000_82580)
1571                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1572                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1573                                  &eeprom_data);
1574         else if (hw->bus.func == 1)
1575                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1576
1577         if (eeprom_data & eeprom_apme_mask)
1578                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1579
1580         /* now that we have the eeprom settings, apply the special cases where
1581          * the eeprom may be wrong or the board simply won't support wake on
1582          * lan on a particular port */
1583         switch (pdev->device) {
1584         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1585                 adapter->eeprom_wol = 0;
1586                 break;
1587         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1588         case E1000_DEV_ID_82576_FIBER:
1589         case E1000_DEV_ID_82576_SERDES:
1590                 /* Wake events only supported on port A for dual fiber
1591                  * regardless of eeprom setting */
1592                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1593                         adapter->eeprom_wol = 0;
1594                 break;
1595         case E1000_DEV_ID_82576_QUAD_COPPER:
1596                 /* if quad port adapter, disable WoL on all but port A */
1597                 if (global_quad_port_a != 0)
1598                         adapter->eeprom_wol = 0;
1599                 else
1600                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1601                 /* Reset for multiple quad port adapters */
1602                 if (++global_quad_port_a == 4)
1603                         global_quad_port_a = 0;
1604                 break;
1605         }
1606
1607         /* initialize the wol settings based on the eeprom settings */
1608         adapter->wol = adapter->eeprom_wol;
1609         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1610
1611         /* reset the hardware with the new settings */
1612         igb_reset(adapter);
1613
1614         /* let the f/w know that the h/w is now under the control of the
1615          * driver. */
1616         igb_get_hw_control(adapter);
1617
1618         strcpy(netdev->name, "eth%d");
1619         err = register_netdev(netdev);
1620         if (err)
1621                 goto err_register;
1622
1623         /* carrier off reporting is important to ethtool even BEFORE open */
1624         netif_carrier_off(netdev);
1625
1626 #ifdef CONFIG_IGB_DCA
1627         if (dca_add_requester(&pdev->dev) == 0) {
1628                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1629                 dev_info(&pdev->dev, "DCA enabled\n");
1630                 igb_setup_dca(adapter);
1631         }
1632
1633 #endif
1634         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1635         /* print bus type/speed/width info */
1636         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1637                  netdev->name,
1638                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1639                                                             "unknown"),
1640                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1641                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1642                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1643                    "unknown"),
1644                  netdev->dev_addr);
1645
1646         igb_read_part_num(hw, &part_num);
1647         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1648                 (part_num >> 8), (part_num & 0xff));
1649
1650         dev_info(&pdev->dev,
1651                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1652                 adapter->msix_entries ? "MSI-X" :
1653                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1654                 adapter->num_rx_queues, adapter->num_tx_queues);
1655
1656         return 0;
1657
1658 err_register:
1659         igb_release_hw_control(adapter);
1660 err_eeprom:
1661         if (!igb_check_reset_block(hw))
1662                 igb_reset_phy(hw);
1663
1664         if (hw->flash_address)
1665                 iounmap(hw->flash_address);
1666 err_sw_init:
1667         igb_clear_interrupt_scheme(adapter);
1668         iounmap(hw->hw_addr);
1669 err_ioremap:
1670         free_netdev(netdev);
1671 err_alloc_etherdev:
1672         pci_release_selected_regions(pdev,
1673                                      pci_select_bars(pdev, IORESOURCE_MEM));
1674 err_pci_reg:
1675 err_dma:
1676         pci_disable_device(pdev);
1677         return err;
1678 }
1679
1680 /**
1681  * igb_remove - Device Removal Routine
1682  * @pdev: PCI device information struct
1683  *
1684  * igb_remove is called by the PCI subsystem to alert the driver
1685  * that it should release a PCI device.  The could be caused by a
1686  * Hot-Plug event, or because the driver is going to be removed from
1687  * memory.
1688  **/
1689 static void __devexit igb_remove(struct pci_dev *pdev)
1690 {
1691         struct net_device *netdev = pci_get_drvdata(pdev);
1692         struct igb_adapter *adapter = netdev_priv(netdev);
1693         struct e1000_hw *hw = &adapter->hw;
1694
1695         /* flush_scheduled work may reschedule our watchdog task, so
1696          * explicitly disable watchdog tasks from being rescheduled  */
1697         set_bit(__IGB_DOWN, &adapter->state);
1698         del_timer_sync(&adapter->watchdog_timer);
1699         del_timer_sync(&adapter->phy_info_timer);
1700
1701         flush_scheduled_work();
1702
1703 #ifdef CONFIG_IGB_DCA
1704         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1705                 dev_info(&pdev->dev, "DCA disabled\n");
1706                 dca_remove_requester(&pdev->dev);
1707                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1708                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1709         }
1710 #endif
1711
1712         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1713          * would have already happened in close and is redundant. */
1714         igb_release_hw_control(adapter);
1715
1716         unregister_netdev(netdev);
1717
1718         igb_clear_interrupt_scheme(adapter);
1719
1720 #ifdef CONFIG_PCI_IOV
1721         /* reclaim resources allocated to VFs */
1722         if (adapter->vf_data) {
1723                 /* disable iov and allow time for transactions to clear */
1724                 pci_disable_sriov(pdev);
1725                 msleep(500);
1726
1727                 kfree(adapter->vf_data);
1728                 adapter->vf_data = NULL;
1729                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1730                 msleep(100);
1731                 dev_info(&pdev->dev, "IOV Disabled\n");
1732         }
1733 #endif
1734
1735         iounmap(hw->hw_addr);
1736         if (hw->flash_address)
1737                 iounmap(hw->flash_address);
1738         pci_release_selected_regions(pdev,
1739                                      pci_select_bars(pdev, IORESOURCE_MEM));
1740
1741         free_netdev(netdev);
1742
1743         pci_disable_pcie_error_reporting(pdev);
1744
1745         pci_disable_device(pdev);
1746 }
1747
1748 /**
1749  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1750  * @adapter: board private structure to initialize
1751  *
1752  * This function initializes the vf specific data storage and then attempts to
1753  * allocate the VFs.  The reason for ordering it this way is because it is much
1754  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1755  * the memory for the VFs.
1756  **/
1757 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1758 {
1759 #ifdef CONFIG_PCI_IOV
1760         struct pci_dev *pdev = adapter->pdev;
1761
1762         if (adapter->vfs_allocated_count > 7)
1763                 adapter->vfs_allocated_count = 7;
1764
1765         if (adapter->vfs_allocated_count) {
1766                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1767                                            sizeof(struct vf_data_storage),
1768                                            GFP_KERNEL);
1769                 /* if allocation failed then we do not support SR-IOV */
1770                 if (!adapter->vf_data) {
1771                         adapter->vfs_allocated_count = 0;
1772                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1773                                 "Data Storage\n");
1774                 }
1775         }
1776
1777         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1778                 kfree(adapter->vf_data);
1779                 adapter->vf_data = NULL;
1780 #endif /* CONFIG_PCI_IOV */
1781                 adapter->vfs_allocated_count = 0;
1782 #ifdef CONFIG_PCI_IOV
1783         } else {
1784                 unsigned char mac_addr[ETH_ALEN];
1785                 int i;
1786                 dev_info(&pdev->dev, "%d vfs allocated\n",
1787                          adapter->vfs_allocated_count);
1788                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1789                         random_ether_addr(mac_addr);
1790                         igb_set_vf_mac(adapter, i, mac_addr);
1791                 }
1792         }
1793 #endif /* CONFIG_PCI_IOV */
1794 }
1795
1796
1797 /**
1798  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1799  * @adapter: board private structure to initialize
1800  *
1801  * igb_init_hw_timer initializes the function pointer and values for the hw
1802  * timer found in hardware.
1803  **/
1804 static void igb_init_hw_timer(struct igb_adapter *adapter)
1805 {
1806         struct e1000_hw *hw = &adapter->hw;
1807
1808         switch (hw->mac.type) {
1809         case e1000_i350:
1810         case e1000_82580:
1811                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1812                 adapter->cycles.read = igb_read_clock;
1813                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1814                 adapter->cycles.mult = 1;
1815                 /*
1816                  * The 82580 timesync updates the system timer every 8ns by 8ns
1817                  * and the value cannot be shifted.  Instead we need to shift
1818                  * the registers to generate a 64bit timer value.  As a result
1819                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1820                  * 24 in order to generate a larger value for synchronization.
1821                  */
1822                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1823                 /* disable system timer temporarily by setting bit 31 */
1824                 wr32(E1000_TSAUXC, 0x80000000);
1825                 wrfl();
1826
1827                 /* Set registers so that rollover occurs soon to test this. */
1828                 wr32(E1000_SYSTIMR, 0x00000000);
1829                 wr32(E1000_SYSTIML, 0x80000000);
1830                 wr32(E1000_SYSTIMH, 0x000000FF);
1831                 wrfl();
1832
1833                 /* enable system timer by clearing bit 31 */
1834                 wr32(E1000_TSAUXC, 0x0);
1835                 wrfl();
1836
1837                 timecounter_init(&adapter->clock,
1838                                  &adapter->cycles,
1839                                  ktime_to_ns(ktime_get_real()));
1840                 /*
1841                  * Synchronize our NIC clock against system wall clock. NIC
1842                  * time stamp reading requires ~3us per sample, each sample
1843                  * was pretty stable even under load => only require 10
1844                  * samples for each offset comparison.
1845                  */
1846                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1847                 adapter->compare.source = &adapter->clock;
1848                 adapter->compare.target = ktime_get_real;
1849                 adapter->compare.num_samples = 10;
1850                 timecompare_update(&adapter->compare, 0);
1851                 break;
1852         case e1000_82576:
1853                 /*
1854                  * Initialize hardware timer: we keep it running just in case
1855                  * that some program needs it later on.
1856                  */
1857                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1858                 adapter->cycles.read = igb_read_clock;
1859                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1860                 adapter->cycles.mult = 1;
1861                 /**
1862                  * Scale the NIC clock cycle by a large factor so that
1863                  * relatively small clock corrections can be added or
1864                  * substracted at each clock tick. The drawbacks of a large
1865                  * factor are a) that the clock register overflows more quickly
1866                  * (not such a big deal) and b) that the increment per tick has
1867                  * to fit into 24 bits.  As a result we need to use a shift of
1868                  * 19 so we can fit a value of 16 into the TIMINCA register.
1869                  */
1870                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1871                 wr32(E1000_TIMINCA,
1872                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1873                                 (16 << IGB_82576_TSYNC_SHIFT));
1874
1875                 /* Set registers so that rollover occurs soon to test this. */
1876                 wr32(E1000_SYSTIML, 0x00000000);
1877                 wr32(E1000_SYSTIMH, 0xFF800000);
1878                 wrfl();
1879
1880                 timecounter_init(&adapter->clock,
1881                                  &adapter->cycles,
1882                                  ktime_to_ns(ktime_get_real()));
1883                 /*
1884                  * Synchronize our NIC clock against system wall clock. NIC
1885                  * time stamp reading requires ~3us per sample, each sample
1886                  * was pretty stable even under load => only require 10
1887                  * samples for each offset comparison.
1888                  */
1889                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1890                 adapter->compare.source = &adapter->clock;
1891                 adapter->compare.target = ktime_get_real;
1892                 adapter->compare.num_samples = 10;
1893                 timecompare_update(&adapter->compare, 0);
1894                 break;
1895         case e1000_82575:
1896                 /* 82575 does not support timesync */
1897         default:
1898                 break;
1899         }
1900
1901 }
1902
1903 /**
1904  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1905  * @adapter: board private structure to initialize
1906  *
1907  * igb_sw_init initializes the Adapter private data structure.
1908  * Fields are initialized based on PCI device information and
1909  * OS network device settings (MTU size).
1910  **/
1911 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1912 {
1913         struct e1000_hw *hw = &adapter->hw;
1914         struct net_device *netdev = adapter->netdev;
1915         struct pci_dev *pdev = adapter->pdev;
1916
1917         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1918
1919         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1920         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1921         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1922         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1923
1924         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1925         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1926
1927 #ifdef CONFIG_PCI_IOV
1928         if (hw->mac.type == e1000_82576)
1929                 adapter->vfs_allocated_count = max_vfs;
1930
1931 #endif /* CONFIG_PCI_IOV */
1932         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1933
1934         /*
1935          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1936          * then we should combine the queues into a queue pair in order to
1937          * conserve interrupts due to limited supply
1938          */
1939         if ((adapter->rss_queues > 4) ||
1940             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1941                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1942
1943         /* This call may decrease the number of queues */
1944         if (igb_init_interrupt_scheme(adapter)) {
1945                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1946                 return -ENOMEM;
1947         }
1948
1949         igb_init_hw_timer(adapter);
1950         igb_probe_vfs(adapter);
1951
1952         /* Explicitly disable IRQ since the NIC can be in any state. */
1953         igb_irq_disable(adapter);
1954
1955         set_bit(__IGB_DOWN, &adapter->state);
1956         return 0;
1957 }
1958
1959 /**
1960  * igb_open - Called when a network interface is made active
1961  * @netdev: network interface device structure
1962  *
1963  * Returns 0 on success, negative value on failure
1964  *
1965  * The open entry point is called when a network interface is made
1966  * active by the system (IFF_UP).  At this point all resources needed
1967  * for transmit and receive operations are allocated, the interrupt
1968  * handler is registered with the OS, the watchdog timer is started,
1969  * and the stack is notified that the interface is ready.
1970  **/
1971 static int igb_open(struct net_device *netdev)
1972 {
1973         struct igb_adapter *adapter = netdev_priv(netdev);
1974         struct e1000_hw *hw = &adapter->hw;
1975         int err;
1976         int i;
1977
1978         /* disallow open during test */
1979         if (test_bit(__IGB_TESTING, &adapter->state))
1980                 return -EBUSY;
1981
1982         netif_carrier_off(netdev);
1983
1984         /* allocate transmit descriptors */
1985         err = igb_setup_all_tx_resources(adapter);
1986         if (err)
1987                 goto err_setup_tx;
1988
1989         /* allocate receive descriptors */
1990         err = igb_setup_all_rx_resources(adapter);
1991         if (err)
1992                 goto err_setup_rx;
1993
1994         igb_power_up_link(adapter);
1995
1996         /* before we allocate an interrupt, we must be ready to handle it.
1997          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1998          * as soon as we call pci_request_irq, so we have to setup our
1999          * clean_rx handler before we do so.  */
2000         igb_configure(adapter);
2001
2002         err = igb_request_irq(adapter);
2003         if (err)
2004                 goto err_req_irq;
2005
2006         /* From here on the code is the same as igb_up() */
2007         clear_bit(__IGB_DOWN, &adapter->state);
2008
2009         for (i = 0; i < adapter->num_q_vectors; i++) {
2010                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2011                 napi_enable(&q_vector->napi);
2012         }
2013
2014         /* Clear any pending interrupts. */
2015         rd32(E1000_ICR);
2016
2017         igb_irq_enable(adapter);
2018
2019         /* notify VFs that reset has been completed */
2020         if (adapter->vfs_allocated_count) {
2021                 u32 reg_data = rd32(E1000_CTRL_EXT);
2022                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2023                 wr32(E1000_CTRL_EXT, reg_data);
2024         }
2025
2026         netif_tx_start_all_queues(netdev);
2027
2028         /* start the watchdog. */
2029         hw->mac.get_link_status = 1;
2030         schedule_work(&adapter->watchdog_task);
2031
2032         return 0;
2033
2034 err_req_irq:
2035         igb_release_hw_control(adapter);
2036         igb_power_down_link(adapter);
2037         igb_free_all_rx_resources(adapter);
2038 err_setup_rx:
2039         igb_free_all_tx_resources(adapter);
2040 err_setup_tx:
2041         igb_reset(adapter);
2042
2043         return err;
2044 }
2045
2046 /**
2047  * igb_close - Disables a network interface
2048  * @netdev: network interface device structure
2049  *
2050  * Returns 0, this is not allowed to fail
2051  *
2052  * The close entry point is called when an interface is de-activated
2053  * by the OS.  The hardware is still under the driver's control, but
2054  * needs to be disabled.  A global MAC reset is issued to stop the
2055  * hardware, and all transmit and receive resources are freed.
2056  **/
2057 static int igb_close(struct net_device *netdev)
2058 {
2059         struct igb_adapter *adapter = netdev_priv(netdev);
2060
2061         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2062         igb_down(adapter);
2063
2064         igb_free_irq(adapter);
2065
2066         igb_free_all_tx_resources(adapter);
2067         igb_free_all_rx_resources(adapter);
2068
2069         return 0;
2070 }
2071
2072 /**
2073  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2074  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2075  *
2076  * Return 0 on success, negative on failure
2077  **/
2078 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2079 {
2080         struct pci_dev *pdev = tx_ring->pdev;
2081         int size;
2082
2083         size = sizeof(struct igb_buffer) * tx_ring->count;
2084         tx_ring->buffer_info = vmalloc(size);
2085         if (!tx_ring->buffer_info)
2086                 goto err;
2087         memset(tx_ring->buffer_info, 0, size);
2088
2089         /* round up to nearest 4K */
2090         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2091         tx_ring->size = ALIGN(tx_ring->size, 4096);
2092
2093         tx_ring->desc = pci_alloc_consistent(pdev,
2094                                              tx_ring->size,
2095                                              &tx_ring->dma);
2096
2097         if (!tx_ring->desc)
2098                 goto err;
2099
2100         tx_ring->next_to_use = 0;
2101         tx_ring->next_to_clean = 0;
2102         return 0;
2103
2104 err:
2105         vfree(tx_ring->buffer_info);
2106         dev_err(&pdev->dev,
2107                 "Unable to allocate memory for the transmit descriptor ring\n");
2108         return -ENOMEM;
2109 }
2110
2111 /**
2112  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2113  *                                (Descriptors) for all queues
2114  * @adapter: board private structure
2115  *
2116  * Return 0 on success, negative on failure
2117  **/
2118 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2119 {
2120         struct pci_dev *pdev = adapter->pdev;
2121         int i, err = 0;
2122
2123         for (i = 0; i < adapter->num_tx_queues; i++) {
2124                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2125                 if (err) {
2126                         dev_err(&pdev->dev,
2127                                 "Allocation for Tx Queue %u failed\n", i);
2128                         for (i--; i >= 0; i--)
2129                                 igb_free_tx_resources(adapter->tx_ring[i]);
2130                         break;
2131                 }
2132         }
2133
2134         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2135                 int r_idx = i % adapter->num_tx_queues;
2136                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2137         }
2138         return err;
2139 }
2140
2141 /**
2142  * igb_setup_tctl - configure the transmit control registers
2143  * @adapter: Board private structure
2144  **/
2145 void igb_setup_tctl(struct igb_adapter *adapter)
2146 {
2147         struct e1000_hw *hw = &adapter->hw;
2148         u32 tctl;
2149
2150         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2151         wr32(E1000_TXDCTL(0), 0);
2152
2153         /* Program the Transmit Control Register */
2154         tctl = rd32(E1000_TCTL);
2155         tctl &= ~E1000_TCTL_CT;
2156         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2157                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2158
2159         igb_config_collision_dist(hw);
2160
2161         /* Enable transmits */
2162         tctl |= E1000_TCTL_EN;
2163
2164         wr32(E1000_TCTL, tctl);
2165 }
2166
2167 /**
2168  * igb_configure_tx_ring - Configure transmit ring after Reset
2169  * @adapter: board private structure
2170  * @ring: tx ring to configure
2171  *
2172  * Configure a transmit ring after a reset.
2173  **/
2174 void igb_configure_tx_ring(struct igb_adapter *adapter,
2175                            struct igb_ring *ring)
2176 {
2177         struct e1000_hw *hw = &adapter->hw;
2178         u32 txdctl;
2179         u64 tdba = ring->dma;
2180         int reg_idx = ring->reg_idx;
2181
2182         /* disable the queue */
2183         txdctl = rd32(E1000_TXDCTL(reg_idx));
2184         wr32(E1000_TXDCTL(reg_idx),
2185                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2186         wrfl();
2187         mdelay(10);
2188
2189         wr32(E1000_TDLEN(reg_idx),
2190                         ring->count * sizeof(union e1000_adv_tx_desc));
2191         wr32(E1000_TDBAL(reg_idx),
2192                         tdba & 0x00000000ffffffffULL);
2193         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2194
2195         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2196         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2197         writel(0, ring->head);
2198         writel(0, ring->tail);
2199
2200         txdctl |= IGB_TX_PTHRESH;
2201         txdctl |= IGB_TX_HTHRESH << 8;
2202         txdctl |= IGB_TX_WTHRESH << 16;
2203
2204         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2205         wr32(E1000_TXDCTL(reg_idx), txdctl);
2206 }
2207
2208 /**
2209  * igb_configure_tx - Configure transmit Unit after Reset
2210  * @adapter: board private structure
2211  *
2212  * Configure the Tx unit of the MAC after a reset.
2213  **/
2214 static void igb_configure_tx(struct igb_adapter *adapter)
2215 {
2216         int i;
2217
2218         for (i = 0; i < adapter->num_tx_queues; i++)
2219                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2220 }
2221
2222 /**
2223  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2224  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2225  *
2226  * Returns 0 on success, negative on failure
2227  **/
2228 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2229 {
2230         struct pci_dev *pdev = rx_ring->pdev;
2231         int size, desc_len;
2232
2233         size = sizeof(struct igb_buffer) * rx_ring->count;
2234         rx_ring->buffer_info = vmalloc(size);
2235         if (!rx_ring->buffer_info)
2236                 goto err;
2237         memset(rx_ring->buffer_info, 0, size);
2238
2239         desc_len = sizeof(union e1000_adv_rx_desc);
2240
2241         /* Round up to nearest 4K */
2242         rx_ring->size = rx_ring->count * desc_len;
2243         rx_ring->size = ALIGN(rx_ring->size, 4096);
2244
2245         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2246                                              &rx_ring->dma);
2247
2248         if (!rx_ring->desc)
2249                 goto err;
2250
2251         rx_ring->next_to_clean = 0;
2252         rx_ring->next_to_use = 0;
2253
2254         return 0;
2255
2256 err:
2257         vfree(rx_ring->buffer_info);
2258         rx_ring->buffer_info = NULL;
2259         dev_err(&pdev->dev, "Unable to allocate memory for "
2260                 "the receive descriptor ring\n");
2261         return -ENOMEM;
2262 }
2263
2264 /**
2265  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2266  *                                (Descriptors) for all queues
2267  * @adapter: board private structure
2268  *
2269  * Return 0 on success, negative on failure
2270  **/
2271 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2272 {
2273         struct pci_dev *pdev = adapter->pdev;
2274         int i, err = 0;
2275
2276         for (i = 0; i < adapter->num_rx_queues; i++) {
2277                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2278                 if (err) {
2279                         dev_err(&pdev->dev,
2280                                 "Allocation for Rx Queue %u failed\n", i);
2281                         for (i--; i >= 0; i--)
2282                                 igb_free_rx_resources(adapter->rx_ring[i]);
2283                         break;
2284                 }
2285         }
2286
2287         return err;
2288 }
2289
2290 /**
2291  * igb_setup_mrqc - configure the multiple receive queue control registers
2292  * @adapter: Board private structure
2293  **/
2294 static void igb_setup_mrqc(struct igb_adapter *adapter)
2295 {
2296         struct e1000_hw *hw = &adapter->hw;
2297         u32 mrqc, rxcsum;
2298         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2299         union e1000_reta {
2300                 u32 dword;
2301                 u8  bytes[4];
2302         } reta;
2303         static const u8 rsshash[40] = {
2304                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2305                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2306                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2307                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2308
2309         /* Fill out hash function seeds */
2310         for (j = 0; j < 10; j++) {
2311                 u32 rsskey = rsshash[(j * 4)];
2312                 rsskey |= rsshash[(j * 4) + 1] << 8;
2313                 rsskey |= rsshash[(j * 4) + 2] << 16;
2314                 rsskey |= rsshash[(j * 4) + 3] << 24;
2315                 array_wr32(E1000_RSSRK(0), j, rsskey);
2316         }
2317
2318         num_rx_queues = adapter->rss_queues;
2319
2320         if (adapter->vfs_allocated_count) {
2321                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2322                 switch (hw->mac.type) {
2323                 case e1000_i350:
2324                 case e1000_82580:
2325                         num_rx_queues = 1;
2326                         shift = 0;
2327                         break;
2328                 case e1000_82576:
2329                         shift = 3;
2330                         num_rx_queues = 2;
2331                         break;
2332                 case e1000_82575:
2333                         shift = 2;
2334                         shift2 = 6;
2335                 default:
2336                         break;
2337                 }
2338         } else {
2339                 if (hw->mac.type == e1000_82575)
2340                         shift = 6;
2341         }
2342
2343         for (j = 0; j < (32 * 4); j++) {
2344                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2345                 if (shift2)
2346                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2347                 if ((j & 3) == 3)
2348                         wr32(E1000_RETA(j >> 2), reta.dword);
2349         }
2350
2351         /*
2352          * Disable raw packet checksumming so that RSS hash is placed in
2353          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2354          * offloads as they are enabled by default
2355          */
2356         rxcsum = rd32(E1000_RXCSUM);
2357         rxcsum |= E1000_RXCSUM_PCSD;
2358
2359         if (adapter->hw.mac.type >= e1000_82576)
2360                 /* Enable Receive Checksum Offload for SCTP */
2361                 rxcsum |= E1000_RXCSUM_CRCOFL;
2362
2363         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2364         wr32(E1000_RXCSUM, rxcsum);
2365
2366         /* If VMDq is enabled then we set the appropriate mode for that, else
2367          * we default to RSS so that an RSS hash is calculated per packet even
2368          * if we are only using one queue */
2369         if (adapter->vfs_allocated_count) {
2370                 if (hw->mac.type > e1000_82575) {
2371                         /* Set the default pool for the PF's first queue */
2372                         u32 vtctl = rd32(E1000_VT_CTL);
2373                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2374                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2375                         vtctl |= adapter->vfs_allocated_count <<
2376                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2377                         wr32(E1000_VT_CTL, vtctl);
2378                 }
2379                 if (adapter->rss_queues > 1)
2380                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2381                 else
2382                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2383         } else {
2384                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2385         }
2386         igb_vmm_control(adapter);
2387
2388         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2389                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2390         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2391                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2392         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2393                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2394         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2395                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2396
2397         wr32(E1000_MRQC, mrqc);
2398 }
2399
2400 /**
2401  * igb_setup_rctl - configure the receive control registers
2402  * @adapter: Board private structure
2403  **/
2404 void igb_setup_rctl(struct igb_adapter *adapter)
2405 {
2406         struct e1000_hw *hw = &adapter->hw;
2407         u32 rctl;
2408
2409         rctl = rd32(E1000_RCTL);
2410
2411         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2412         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2413
2414         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2415                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2416
2417         /*
2418          * enable stripping of CRC. It's unlikely this will break BMC
2419          * redirection as it did with e1000. Newer features require
2420          * that the HW strips the CRC.
2421          */
2422         rctl |= E1000_RCTL_SECRC;
2423
2424         /* disable store bad packets and clear size bits. */
2425         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2426
2427         /* enable LPE to prevent packets larger than max_frame_size */
2428         rctl |= E1000_RCTL_LPE;
2429
2430         /* disable queue 0 to prevent tail write w/o re-config */
2431         wr32(E1000_RXDCTL(0), 0);
2432
2433         /* Attention!!!  For SR-IOV PF driver operations you must enable
2434          * queue drop for all VF and PF queues to prevent head of line blocking
2435          * if an un-trusted VF does not provide descriptors to hardware.
2436          */
2437         if (adapter->vfs_allocated_count) {
2438                 /* set all queue drop enable bits */
2439                 wr32(E1000_QDE, ALL_QUEUES);
2440         }
2441
2442         wr32(E1000_RCTL, rctl);
2443 }
2444
2445 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2446                                    int vfn)
2447 {
2448         struct e1000_hw *hw = &adapter->hw;
2449         u32 vmolr;
2450
2451         /* if it isn't the PF check to see if VFs are enabled and
2452          * increase the size to support vlan tags */
2453         if (vfn < adapter->vfs_allocated_count &&
2454             adapter->vf_data[vfn].vlans_enabled)
2455                 size += VLAN_TAG_SIZE;
2456
2457         vmolr = rd32(E1000_VMOLR(vfn));
2458         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2459         vmolr |= size | E1000_VMOLR_LPE;
2460         wr32(E1000_VMOLR(vfn), vmolr);
2461
2462         return 0;
2463 }
2464
2465 /**
2466  * igb_rlpml_set - set maximum receive packet size
2467  * @adapter: board private structure
2468  *
2469  * Configure maximum receivable packet size.
2470  **/
2471 static void igb_rlpml_set(struct igb_adapter *adapter)
2472 {
2473         u32 max_frame_size = adapter->max_frame_size;
2474         struct e1000_hw *hw = &adapter->hw;
2475         u16 pf_id = adapter->vfs_allocated_count;
2476
2477         if (adapter->vlgrp)
2478                 max_frame_size += VLAN_TAG_SIZE;
2479
2480         /* if vfs are enabled we set RLPML to the largest possible request
2481          * size and set the VMOLR RLPML to the size we need */
2482         if (pf_id) {
2483                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2484                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2485         }
2486
2487         wr32(E1000_RLPML, max_frame_size);
2488 }
2489
2490 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2491                                  int vfn, bool aupe)
2492 {
2493         struct e1000_hw *hw = &adapter->hw;
2494         u32 vmolr;
2495
2496         /*
2497          * This register exists only on 82576 and newer so if we are older then
2498          * we should exit and do nothing
2499          */
2500         if (hw->mac.type < e1000_82576)
2501                 return;
2502
2503         vmolr = rd32(E1000_VMOLR(vfn));
2504         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2505         if (aupe)
2506                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2507         else
2508                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2509
2510         /* clear all bits that might not be set */
2511         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2512
2513         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2514                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2515         /*
2516          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2517          * multicast packets
2518          */
2519         if (vfn <= adapter->vfs_allocated_count)
2520                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2521
2522         wr32(E1000_VMOLR(vfn), vmolr);
2523 }
2524
2525 /**
2526  * igb_configure_rx_ring - Configure a receive ring after Reset
2527  * @adapter: board private structure
2528  * @ring: receive ring to be configured
2529  *
2530  * Configure the Rx unit of the MAC after a reset.
2531  **/
2532 void igb_configure_rx_ring(struct igb_adapter *adapter,
2533                            struct igb_ring *ring)
2534 {
2535         struct e1000_hw *hw = &adapter->hw;
2536         u64 rdba = ring->dma;
2537         int reg_idx = ring->reg_idx;
2538         u32 srrctl, rxdctl;
2539
2540         /* disable the queue */
2541         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2542         wr32(E1000_RXDCTL(reg_idx),
2543                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2544
2545         /* Set DMA base address registers */
2546         wr32(E1000_RDBAL(reg_idx),
2547              rdba & 0x00000000ffffffffULL);
2548         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2549         wr32(E1000_RDLEN(reg_idx),
2550                        ring->count * sizeof(union e1000_adv_rx_desc));
2551
2552         /* initialize head and tail */
2553         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2554         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2555         writel(0, ring->head);
2556         writel(0, ring->tail);
2557
2558         /* set descriptor configuration */
2559         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2560                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2561                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2562 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2563                 srrctl |= IGB_RXBUFFER_16384 >>
2564                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2565 #else
2566                 srrctl |= (PAGE_SIZE / 2) >>
2567                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2568 #endif
2569                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2570         } else {
2571                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2572                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2573                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2574         }
2575         if (hw->mac.type == e1000_82580)
2576                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2577         /* Only set Drop Enable if we are supporting multiple queues */
2578         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2579                 srrctl |= E1000_SRRCTL_DROP_EN;
2580
2581         wr32(E1000_SRRCTL(reg_idx), srrctl);
2582
2583         /* set filtering for VMDQ pools */
2584         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2585
2586         /* enable receive descriptor fetching */
2587         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2588         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2589         rxdctl &= 0xFFF00000;
2590         rxdctl |= IGB_RX_PTHRESH;
2591         rxdctl |= IGB_RX_HTHRESH << 8;
2592         rxdctl |= IGB_RX_WTHRESH << 16;
2593         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2594 }
2595
2596 /**
2597  * igb_configure_rx - Configure receive Unit after Reset
2598  * @adapter: board private structure
2599  *
2600  * Configure the Rx unit of the MAC after a reset.
2601  **/
2602 static void igb_configure_rx(struct igb_adapter *adapter)
2603 {
2604         int i;
2605
2606         /* set UTA to appropriate mode */
2607         igb_set_uta(adapter);
2608
2609         /* set the correct pool for the PF default MAC address in entry 0 */
2610         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2611                          adapter->vfs_allocated_count);
2612
2613         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2614          * the Base and Length of the Rx Descriptor Ring */
2615         for (i = 0; i < adapter->num_rx_queues; i++)
2616                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2617 }
2618
2619 /**
2620  * igb_free_tx_resources - Free Tx Resources per Queue
2621  * @tx_ring: Tx descriptor ring for a specific queue
2622  *
2623  * Free all transmit software resources
2624  **/
2625 void igb_free_tx_resources(struct igb_ring *tx_ring)
2626 {
2627         igb_clean_tx_ring(tx_ring);
2628
2629         vfree(tx_ring->buffer_info);
2630         tx_ring->buffer_info = NULL;
2631
2632         /* if not set, then don't free */
2633         if (!tx_ring->desc)
2634                 return;
2635
2636         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2637                             tx_ring->desc, tx_ring->dma);
2638
2639         tx_ring->desc = NULL;
2640 }
2641
2642 /**
2643  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2644  * @adapter: board private structure
2645  *
2646  * Free all transmit software resources
2647  **/
2648 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2649 {
2650         int i;
2651
2652         for (i = 0; i < adapter->num_tx_queues; i++)
2653                 igb_free_tx_resources(adapter->tx_ring[i]);
2654 }
2655
2656 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2657                                     struct igb_buffer *buffer_info)
2658 {
2659         if (buffer_info->dma) {
2660                 if (buffer_info->mapped_as_page)
2661                         pci_unmap_page(tx_ring->pdev,
2662                                         buffer_info->dma,
2663                                         buffer_info->length,
2664                                         PCI_DMA_TODEVICE);
2665                 else
2666                         pci_unmap_single(tx_ring->pdev,
2667                                         buffer_info->dma,
2668                                         buffer_info->length,
2669                                         PCI_DMA_TODEVICE);
2670                 buffer_info->dma = 0;
2671         }
2672         if (buffer_info->skb) {
2673                 dev_kfree_skb_any(buffer_info->skb);
2674                 buffer_info->skb = NULL;
2675         }
2676         buffer_info->time_stamp = 0;
2677         buffer_info->length = 0;
2678         buffer_info->next_to_watch = 0;
2679         buffer_info->mapped_as_page = false;
2680 }
2681
2682 /**
2683  * igb_clean_tx_ring - Free Tx Buffers
2684  * @tx_ring: ring to be cleaned
2685  **/
2686 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2687 {
2688         struct igb_buffer *buffer_info;
2689         unsigned long size;
2690         unsigned int i;
2691
2692         if (!tx_ring->buffer_info)
2693                 return;
2694         /* Free all the Tx ring sk_buffs */
2695
2696         for (i = 0; i < tx_ring->count; i++) {
2697                 buffer_info = &tx_ring->buffer_info[i];
2698                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2699         }
2700
2701         size = sizeof(struct igb_buffer) * tx_ring->count;
2702         memset(tx_ring->buffer_info, 0, size);
2703
2704         /* Zero out the descriptor ring */
2705         memset(tx_ring->desc, 0, tx_ring->size);
2706
2707         tx_ring->next_to_use = 0;
2708         tx_ring->next_to_clean = 0;
2709 }
2710
2711 /**
2712  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2713  * @adapter: board private structure
2714  **/
2715 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2716 {
2717         int i;
2718
2719         for (i = 0; i < adapter->num_tx_queues; i++)
2720                 igb_clean_tx_ring(adapter->tx_ring[i]);
2721 }
2722
2723 /**
2724  * igb_free_rx_resources - Free Rx Resources
2725  * @rx_ring: ring to clean the resources from
2726  *
2727  * Free all receive software resources
2728  **/
2729 void igb_free_rx_resources(struct igb_ring *rx_ring)
2730 {
2731         igb_clean_rx_ring(rx_ring);
2732
2733         vfree(rx_ring->buffer_info);
2734         rx_ring->buffer_info = NULL;
2735
2736         /* if not set, then don't free */
2737         if (!rx_ring->desc)
2738                 return;
2739
2740         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2741                             rx_ring->desc, rx_ring->dma);
2742
2743         rx_ring->desc = NULL;
2744 }
2745
2746 /**
2747  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2748  * @adapter: board private structure
2749  *
2750  * Free all receive software resources
2751  **/
2752 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2753 {
2754         int i;
2755
2756         for (i = 0; i < adapter->num_rx_queues; i++)
2757                 igb_free_rx_resources(adapter->rx_ring[i]);
2758 }
2759
2760 /**
2761  * igb_clean_rx_ring - Free Rx Buffers per Queue
2762  * @rx_ring: ring to free buffers from
2763  **/
2764 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2765 {
2766         struct igb_buffer *buffer_info;
2767         unsigned long size;
2768         unsigned int i;
2769
2770         if (!rx_ring->buffer_info)
2771                 return;
2772
2773         /* Free all the Rx ring sk_buffs */
2774         for (i = 0; i < rx_ring->count; i++) {
2775                 buffer_info = &rx_ring->buffer_info[i];
2776                 if (buffer_info->dma) {
2777                         pci_unmap_single(rx_ring->pdev,
2778                                          buffer_info->dma,
2779                                          rx_ring->rx_buffer_len,
2780                                          PCI_DMA_FROMDEVICE);
2781                         buffer_info->dma = 0;
2782                 }
2783
2784                 if (buffer_info->skb) {
2785                         dev_kfree_skb(buffer_info->skb);
2786                         buffer_info->skb = NULL;
2787                 }
2788                 if (buffer_info->page_dma) {
2789                         pci_unmap_page(rx_ring->pdev,
2790                                        buffer_info->page_dma,
2791                                        PAGE_SIZE / 2,
2792                                        PCI_DMA_FROMDEVICE);
2793                         buffer_info->page_dma = 0;
2794                 }
2795                 if (buffer_info->page) {
2796                         put_page(buffer_info->page);
2797                         buffer_info->page = NULL;
2798                         buffer_info->page_offset = 0;
2799                 }
2800         }
2801
2802         size = sizeof(struct igb_buffer) * rx_ring->count;
2803         memset(rx_ring->buffer_info, 0, size);
2804
2805         /* Zero out the descriptor ring */
2806         memset(rx_ring->desc, 0, rx_ring->size);
2807
2808         rx_ring->next_to_clean = 0;
2809         rx_ring->next_to_use = 0;
2810 }
2811
2812 /**
2813  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2814  * @adapter: board private structure
2815  **/
2816 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2817 {
2818         int i;
2819
2820         for (i = 0; i < adapter->num_rx_queues; i++)
2821                 igb_clean_rx_ring(adapter->rx_ring[i]);
2822 }
2823
2824 /**
2825  * igb_set_mac - Change the Ethernet Address of the NIC
2826  * @netdev: network interface device structure
2827  * @p: pointer to an address structure
2828  *
2829  * Returns 0 on success, negative on failure
2830  **/
2831 static int igb_set_mac(struct net_device *netdev, void *p)
2832 {
2833         struct igb_adapter *adapter = netdev_priv(netdev);
2834         struct e1000_hw *hw = &adapter->hw;
2835         struct sockaddr *addr = p;
2836
2837         if (!is_valid_ether_addr(addr->sa_data))
2838                 return -EADDRNOTAVAIL;
2839
2840         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2841         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2842
2843         /* set the correct pool for the new PF MAC address in entry 0 */
2844         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2845                          adapter->vfs_allocated_count);
2846
2847         return 0;
2848 }
2849
2850 /**
2851  * igb_write_mc_addr_list - write multicast addresses to MTA
2852  * @netdev: network interface device structure
2853  *
2854  * Writes multicast address list to the MTA hash table.
2855  * Returns: -ENOMEM on failure
2856  *                0 on no addresses written
2857  *                X on writing X addresses to MTA
2858  **/
2859 static int igb_write_mc_addr_list(struct net_device *netdev)
2860 {
2861         struct igb_adapter *adapter = netdev_priv(netdev);
2862         struct e1000_hw *hw = &adapter->hw;
2863         struct netdev_hw_addr *ha;
2864         u8  *mta_list;
2865         int i;
2866
2867         if (netdev_mc_empty(netdev)) {
2868                 /* nothing to program, so clear mc list */
2869                 igb_update_mc_addr_list(hw, NULL, 0);
2870                 igb_restore_vf_multicasts(adapter);
2871                 return 0;
2872         }
2873
2874         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2875         if (!mta_list)
2876                 return -ENOMEM;
2877
2878         /* The shared function expects a packed array of only addresses. */
2879         i = 0;
2880         netdev_for_each_mc_addr(ha, netdev)
2881                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
2882
2883         igb_update_mc_addr_list(hw, mta_list, i);
2884         kfree(mta_list);
2885
2886         return netdev_mc_count(netdev);
2887 }
2888
2889 /**
2890  * igb_write_uc_addr_list - write unicast addresses to RAR table
2891  * @netdev: network interface device structure
2892  *
2893  * Writes unicast address list to the RAR table.
2894  * Returns: -ENOMEM on failure/insufficient address space
2895  *                0 on no addresses written
2896  *                X on writing X addresses to the RAR table
2897  **/
2898 static int igb_write_uc_addr_list(struct net_device *netdev)
2899 {
2900         struct igb_adapter *adapter = netdev_priv(netdev);
2901         struct e1000_hw *hw = &adapter->hw;
2902         unsigned int vfn = adapter->vfs_allocated_count;
2903         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2904         int count = 0;
2905
2906         /* return ENOMEM indicating insufficient memory for addresses */
2907         if (netdev_uc_count(netdev) > rar_entries)
2908                 return -ENOMEM;
2909
2910         if (!netdev_uc_empty(netdev) && rar_entries) {
2911                 struct netdev_hw_addr *ha;
2912
2913                 netdev_for_each_uc_addr(ha, netdev) {
2914                         if (!rar_entries)
2915                                 break;
2916                         igb_rar_set_qsel(adapter, ha->addr,
2917                                          rar_entries--,
2918                                          vfn);
2919                         count++;
2920                 }
2921         }
2922         /* write the addresses in reverse order to avoid write combining */
2923         for (; rar_entries > 0 ; rar_entries--) {
2924                 wr32(E1000_RAH(rar_entries), 0);
2925                 wr32(E1000_RAL(rar_entries), 0);
2926         }
2927         wrfl();
2928
2929         return count;
2930 }
2931
2932 /**
2933  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2934  * @netdev: network interface device structure
2935  *
2936  * The set_rx_mode entry point is called whenever the unicast or multicast
2937  * address lists or the network interface flags are updated.  This routine is
2938  * responsible for configuring the hardware for proper unicast, multicast,
2939  * promiscuous mode, and all-multi behavior.
2940  **/
2941 static void igb_set_rx_mode(struct net_device *netdev)
2942 {
2943         struct igb_adapter *adapter = netdev_priv(netdev);
2944         struct e1000_hw *hw = &adapter->hw;
2945         unsigned int vfn = adapter->vfs_allocated_count;
2946         u32 rctl, vmolr = 0;
2947         int count;
2948
2949         /* Check for Promiscuous and All Multicast modes */
2950         rctl = rd32(E1000_RCTL);
2951
2952         /* clear the effected bits */
2953         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2954
2955         if (netdev->flags & IFF_PROMISC) {
2956                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2957                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2958         } else {
2959                 if (netdev->flags & IFF_ALLMULTI) {
2960                         rctl |= E1000_RCTL_MPE;
2961                         vmolr |= E1000_VMOLR_MPME;
2962                 } else {
2963                         /*
2964                          * Write addresses to the MTA, if the attempt fails
2965                          * then we should just turn on promiscous mode so
2966                          * that we can at least receive multicast traffic
2967                          */
2968                         count = igb_write_mc_addr_list(netdev);
2969                         if (count < 0) {
2970                                 rctl |= E1000_RCTL_MPE;
2971                                 vmolr |= E1000_VMOLR_MPME;
2972                         } else if (count) {
2973                                 vmolr |= E1000_VMOLR_ROMPE;
2974                         }
2975                 }
2976                 /*
2977                  * Write addresses to available RAR registers, if there is not
2978                  * sufficient space to store all the addresses then enable
2979                  * unicast promiscous mode
2980                  */
2981                 count = igb_write_uc_addr_list(netdev);
2982                 if (count < 0) {
2983                         rctl |= E1000_RCTL_UPE;
2984                         vmolr |= E1000_VMOLR_ROPE;
2985                 }
2986                 rctl |= E1000_RCTL_VFE;
2987         }
2988         wr32(E1000_RCTL, rctl);
2989
2990         /*
2991          * In order to support SR-IOV and eventually VMDq it is necessary to set
2992          * the VMOLR to enable the appropriate modes.  Without this workaround
2993          * we will have issues with VLAN tag stripping not being done for frames
2994          * that are only arriving because we are the default pool
2995          */
2996         if (hw->mac.type < e1000_82576)
2997                 return;
2998
2999         vmolr |= rd32(E1000_VMOLR(vfn)) &
3000                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3001         wr32(E1000_VMOLR(vfn), vmolr);
3002         igb_restore_vf_multicasts(adapter);
3003 }
3004
3005 /* Need to wait a few seconds after link up to get diagnostic information from
3006  * the phy */
3007 static void igb_update_phy_info(unsigned long data)
3008 {
3009         struct igb_adapter *adapter = (struct igb_adapter *) data;
3010         igb_get_phy_info(&adapter->hw);
3011 }
3012
3013 /**
3014  * igb_has_link - check shared code for link and determine up/down
3015  * @adapter: pointer to driver private info
3016  **/
3017 bool igb_has_link(struct igb_adapter *adapter)
3018 {
3019         struct e1000_hw *hw = &adapter->hw;
3020         bool link_active = false;
3021         s32 ret_val = 0;
3022
3023         /* get_link_status is set on LSC (link status) interrupt or
3024          * rx sequence error interrupt.  get_link_status will stay
3025          * false until the e1000_check_for_link establishes link
3026          * for copper adapters ONLY
3027          */
3028         switch (hw->phy.media_type) {
3029         case e1000_media_type_copper:
3030                 if (hw->mac.get_link_status) {
3031                         ret_val = hw->mac.ops.check_for_link(hw);
3032                         link_active = !hw->mac.get_link_status;
3033                 } else {
3034                         link_active = true;
3035                 }
3036                 break;
3037         case e1000_media_type_internal_serdes:
3038                 ret_val = hw->mac.ops.check_for_link(hw);
3039                 link_active = hw->mac.serdes_has_link;
3040                 break;
3041         default:
3042         case e1000_media_type_unknown:
3043                 break;
3044         }
3045
3046         return link_active;
3047 }
3048
3049 /**
3050  * igb_watchdog - Timer Call-back
3051  * @data: pointer to adapter cast into an unsigned long
3052  **/
3053 static void igb_watchdog(unsigned long data)
3054 {
3055         struct igb_adapter *adapter = (struct igb_adapter *)data;
3056         /* Do the rest outside of interrupt context */
3057         schedule_work(&adapter->watchdog_task);
3058 }
3059
3060 static void igb_watchdog_task(struct work_struct *work)
3061 {
3062         struct igb_adapter *adapter = container_of(work,
3063                                                    struct igb_adapter,
3064                                                    watchdog_task);
3065         struct e1000_hw *hw = &adapter->hw;
3066         struct net_device *netdev = adapter->netdev;
3067         u32 link;
3068         int i;
3069
3070         link = igb_has_link(adapter);
3071         if (link) {
3072                 if (!netif_carrier_ok(netdev)) {
3073                         u32 ctrl;
3074                         hw->mac.ops.get_speed_and_duplex(hw,
3075                                                          &adapter->link_speed,
3076                                                          &adapter->link_duplex);
3077
3078                         ctrl = rd32(E1000_CTRL);
3079                         /* Links status message must follow this format */
3080                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3081                                  "Flow Control: %s\n",
3082                                netdev->name,
3083                                adapter->link_speed,
3084                                adapter->link_duplex == FULL_DUPLEX ?
3085                                  "Full Duplex" : "Half Duplex",
3086                                ((ctrl & E1000_CTRL_TFCE) &&
3087                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3088                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3089                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3090
3091                         /* adjust timeout factor according to speed/duplex */
3092                         adapter->tx_timeout_factor = 1;
3093                         switch (adapter->link_speed) {
3094                         case SPEED_10:
3095                                 adapter->tx_timeout_factor = 14;
3096                                 break;
3097                         case SPEED_100:
3098                                 /* maybe add some timeout factor ? */
3099                                 break;
3100                         }
3101
3102                         netif_carrier_on(netdev);
3103
3104                         igb_ping_all_vfs(adapter);
3105
3106                         /* link state has changed, schedule phy info update */
3107                         if (!test_bit(__IGB_DOWN, &adapter->state))
3108                                 mod_timer(&adapter->phy_info_timer,
3109                                           round_jiffies(jiffies + 2 * HZ));
3110                 }
3111         } else {
3112                 if (netif_carrier_ok(netdev)) {
3113                         adapter->link_speed = 0;
3114                         adapter->link_duplex = 0;
3115                         /* Links status message must follow this format */
3116                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3117                                netdev->name);
3118                         netif_carrier_off(netdev);
3119
3120                         igb_ping_all_vfs(adapter);
3121
3122                         /* link state has changed, schedule phy info update */
3123                         if (!test_bit(__IGB_DOWN, &adapter->state))
3124                                 mod_timer(&adapter->phy_info_timer,
3125                                           round_jiffies(jiffies + 2 * HZ));
3126                 }
3127         }
3128
3129         igb_update_stats(adapter);
3130
3131         for (i = 0; i < adapter->num_tx_queues; i++) {
3132                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3133                 if (!netif_carrier_ok(netdev)) {
3134                         /* We've lost link, so the controller stops DMA,
3135                          * but we've got queued Tx work that's never going
3136                          * to get done, so reset controller to flush Tx.
3137                          * (Do the reset outside of interrupt context). */
3138                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3139                                 adapter->tx_timeout_count++;
3140                                 schedule_work(&adapter->reset_task);
3141                                 /* return immediately since reset is imminent */
3142                                 return;
3143                         }
3144                 }
3145
3146                 /* Force detection of hung controller every watchdog period */
3147                 tx_ring->detect_tx_hung = true;
3148         }
3149
3150         /* Cause software interrupt to ensure rx ring is cleaned */
3151         if (adapter->msix_entries) {
3152                 u32 eics = 0;
3153                 for (i = 0; i < adapter->num_q_vectors; i++) {
3154                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3155                         eics |= q_vector->eims_value;
3156                 }
3157                 wr32(E1000_EICS, eics);
3158         } else {
3159                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3160         }
3161
3162         /* Reset the timer */
3163         if (!test_bit(__IGB_DOWN, &adapter->state))
3164                 mod_timer(&adapter->watchdog_timer,
3165                           round_jiffies(jiffies + 2 * HZ));
3166 }
3167
3168 enum latency_range {
3169         lowest_latency = 0,
3170         low_latency = 1,
3171         bulk_latency = 2,
3172         latency_invalid = 255
3173 };
3174
3175 /**
3176  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3177  *
3178  *      Stores a new ITR value based on strictly on packet size.  This
3179  *      algorithm is less sophisticated than that used in igb_update_itr,
3180  *      due to the difficulty of synchronizing statistics across multiple
3181  *      receive rings.  The divisors and thresholds used by this fuction
3182  *      were determined based on theoretical maximum wire speed and testing
3183  *      data, in order to minimize response time while increasing bulk
3184  *      throughput.
3185  *      This functionality is controlled by the InterruptThrottleRate module
3186  *      parameter (see igb_param.c)
3187  *      NOTE:  This function is called only when operating in a multiqueue
3188  *             receive environment.
3189  * @q_vector: pointer to q_vector
3190  **/
3191 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3192 {
3193         int new_val = q_vector->itr_val;
3194         int avg_wire_size = 0;
3195         struct igb_adapter *adapter = q_vector->adapter;
3196
3197         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3198          * ints/sec - ITR timer value of 120 ticks.
3199          */
3200         if (adapter->link_speed != SPEED_1000) {
3201                 new_val = 976;
3202                 goto set_itr_val;
3203         }
3204
3205         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3206                 struct igb_ring *ring = q_vector->rx_ring;
3207                 avg_wire_size = ring->total_bytes / ring->total_packets;
3208         }
3209
3210         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3211                 struct igb_ring *ring = q_vector->tx_ring;
3212                 avg_wire_size = max_t(u32, avg_wire_size,
3213                                       (ring->total_bytes /
3214                                        ring->total_packets));
3215         }
3216
3217         /* if avg_wire_size isn't set no work was done */
3218         if (!avg_wire_size)
3219                 goto clear_counts;
3220
3221         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3222         avg_wire_size += 24;
3223
3224         /* Don't starve jumbo frames */
3225         avg_wire_size = min(avg_wire_size, 3000);
3226
3227         /* Give a little boost to mid-size frames */
3228         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3229                 new_val = avg_wire_size / 3;
3230         else
3231                 new_val = avg_wire_size / 2;
3232
3233         /* when in itr mode 3 do not exceed 20K ints/sec */
3234         if (adapter->rx_itr_setting == 3 && new_val < 196)
3235                 new_val = 196;
3236
3237 set_itr_val:
3238         if (new_val != q_vector->itr_val) {
3239                 q_vector->itr_val = new_val;
3240                 q_vector->set_itr = 1;
3241         }
3242 clear_counts:
3243         if (q_vector->rx_ring) {
3244                 q_vector->rx_ring->total_bytes = 0;
3245                 q_vector->rx_ring->total_packets = 0;
3246         }
3247         if (q_vector->tx_ring) {
3248                 q_vector->tx_ring->total_bytes = 0;
3249                 q_vector->tx_ring->total_packets = 0;
3250         }
3251 }
3252
3253 /**
3254  * igb_update_itr - update the dynamic ITR value based on statistics
3255  *      Stores a new ITR value based on packets and byte
3256  *      counts during the last interrupt.  The advantage of per interrupt
3257  *      computation is faster updates and more accurate ITR for the current
3258  *      traffic pattern.  Constants in this function were computed
3259  *      based on theoretical maximum wire speed and thresholds were set based
3260  *      on testing data as well as attempting to minimize response time
3261  *      while increasing bulk throughput.
3262  *      this functionality is controlled by the InterruptThrottleRate module
3263  *      parameter (see igb_param.c)
3264  *      NOTE:  These calculations are only valid when operating in a single-
3265  *             queue environment.
3266  * @adapter: pointer to adapter
3267  * @itr_setting: current q_vector->itr_val
3268  * @packets: the number of packets during this measurement interval
3269  * @bytes: the number of bytes during this measurement interval
3270  **/
3271 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3272                                    int packets, int bytes)
3273 {
3274         unsigned int retval = itr_setting;
3275
3276         if (packets == 0)
3277                 goto update_itr_done;
3278
3279         switch (itr_setting) {
3280         case lowest_latency:
3281                 /* handle TSO and jumbo frames */
3282                 if (bytes/packets > 8000)
3283                         retval = bulk_latency;
3284                 else if ((packets < 5) && (bytes > 512))
3285                         retval = low_latency;
3286                 break;
3287         case low_latency:  /* 50 usec aka 20000 ints/s */
3288                 if (bytes > 10000) {
3289                         /* this if handles the TSO accounting */
3290                         if (bytes/packets > 8000) {
3291                                 retval = bulk_latency;
3292                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3293                                 retval = bulk_latency;
3294                         } else if ((packets > 35)) {
3295                                 retval = lowest_latency;
3296                         }
3297                 } else if (bytes/packets > 2000) {
3298                         retval = bulk_latency;
3299                 } else if (packets <= 2 && bytes < 512) {
3300                         retval = lowest_latency;
3301                 }
3302                 break;
3303         case bulk_latency: /* 250 usec aka 4000 ints/s */
3304                 if (bytes > 25000) {
3305                         if (packets > 35)
3306                                 retval = low_latency;
3307                 } else if (bytes < 1500) {
3308                         retval = low_latency;
3309                 }
3310                 break;
3311         }
3312
3313 update_itr_done:
3314         return retval;
3315 }
3316
3317 static void igb_set_itr(struct igb_adapter *adapter)
3318 {
3319         struct igb_q_vector *q_vector = adapter->q_vector[0];
3320         u16 current_itr;
3321         u32 new_itr = q_vector->itr_val;
3322
3323         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3324         if (adapter->link_speed != SPEED_1000) {
3325                 current_itr = 0;
3326                 new_itr = 4000;
3327                 goto set_itr_now;
3328         }
3329
3330         adapter->rx_itr = igb_update_itr(adapter,
3331                                     adapter->rx_itr,
3332                                     q_vector->rx_ring->total_packets,
3333                                     q_vector->rx_ring->total_bytes);
3334
3335         adapter->tx_itr = igb_update_itr(adapter,
3336                                     adapter->tx_itr,
3337                                     q_vector->tx_ring->total_packets,
3338                                     q_vector->tx_ring->total_bytes);
3339         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3340
3341         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3342         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3343                 current_itr = low_latency;
3344
3345         switch (current_itr) {
3346         /* counts and packets in update_itr are dependent on these numbers */
3347         case lowest_latency:
3348                 new_itr = 56;  /* aka 70,000 ints/sec */
3349                 break;
3350         case low_latency:
3351                 new_itr = 196; /* aka 20,000 ints/sec */
3352                 break;
3353         case bulk_latency:
3354                 new_itr = 980; /* aka 4,000 ints/sec */
3355                 break;
3356         default:
3357                 break;
3358         }
3359
3360 set_itr_now:
3361         q_vector->rx_ring->total_bytes = 0;
3362         q_vector->rx_ring->total_packets = 0;
3363         q_vector->tx_ring->total_bytes = 0;
3364         q_vector->tx_ring->total_packets = 0;
3365
3366         if (new_itr != q_vector->itr_val) {
3367                 /* this attempts to bias the interrupt rate towards Bulk
3368                  * by adding intermediate steps when interrupt rate is
3369                  * increasing */
3370                 new_itr = new_itr > q_vector->itr_val ?
3371                              max((new_itr * q_vector->itr_val) /
3372                                  (new_itr + (q_vector->itr_val >> 2)),
3373                                  new_itr) :
3374                              new_itr;
3375                 /* Don't write the value here; it resets the adapter's
3376                  * internal timer, and causes us to delay far longer than
3377                  * we should between interrupts.  Instead, we write the ITR
3378                  * value at the beginning of the next interrupt so the timing
3379                  * ends up being correct.
3380                  */
3381                 q_vector->itr_val = new_itr;
3382                 q_vector->set_itr = 1;
3383         }
3384
3385         return;
3386 }
3387
3388 #define IGB_TX_FLAGS_CSUM               0x00000001
3389 #define IGB_TX_FLAGS_VLAN               0x00000002
3390 #define IGB_TX_FLAGS_TSO                0x00000004
3391 #define IGB_TX_FLAGS_IPV4               0x00000008
3392 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3393 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3394 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3395
3396 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3397                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3398 {
3399         struct e1000_adv_tx_context_desc *context_desc;
3400         unsigned int i;
3401         int err;
3402         struct igb_buffer *buffer_info;
3403         u32 info = 0, tu_cmd = 0;
3404         u32 mss_l4len_idx;
3405         u8 l4len;
3406
3407         if (skb_header_cloned(skb)) {
3408                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3409                 if (err)
3410                         return err;
3411         }
3412
3413         l4len = tcp_hdrlen(skb);
3414         *hdr_len += l4len;
3415
3416         if (skb->protocol == htons(ETH_P_IP)) {
3417                 struct iphdr *iph = ip_hdr(skb);
3418                 iph->tot_len = 0;
3419                 iph->check = 0;
3420                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3421                                                          iph->daddr, 0,
3422                                                          IPPROTO_TCP,
3423                                                          0);
3424         } else if (skb_is_gso_v6(skb)) {
3425                 ipv6_hdr(skb)->payload_len = 0;
3426                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3427                                                        &ipv6_hdr(skb)->daddr,
3428                                                        0, IPPROTO_TCP, 0);
3429         }
3430
3431         i = tx_ring->next_to_use;
3432
3433         buffer_info = &tx_ring->buffer_info[i];
3434         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3435         /* VLAN MACLEN IPLEN */
3436         if (tx_flags & IGB_TX_FLAGS_VLAN)
3437                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3438         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3439         *hdr_len += skb_network_offset(skb);
3440         info |= skb_network_header_len(skb);
3441         *hdr_len += skb_network_header_len(skb);
3442         context_desc->vlan_macip_lens = cpu_to_le32(info);
3443
3444         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3445         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3446
3447         if (skb->protocol == htons(ETH_P_IP))
3448                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3449         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3450
3451         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3452
3453         /* MSS L4LEN IDX */
3454         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3455         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3456
3457         /* For 82575, context index must be unique per ring. */
3458         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3459                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3460
3461         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3462         context_desc->seqnum_seed = 0;
3463
3464         buffer_info->time_stamp = jiffies;
3465         buffer_info->next_to_watch = i;
3466         buffer_info->dma = 0;
3467         i++;
3468         if (i == tx_ring->count)
3469                 i = 0;
3470
3471         tx_ring->next_to_use = i;
3472
3473         return true;
3474 }
3475
3476 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3477                                    struct sk_buff *skb, u32 tx_flags)
3478 {
3479         struct e1000_adv_tx_context_desc *context_desc;
3480         struct pci_dev *pdev = tx_ring->pdev;
3481         struct igb_buffer *buffer_info;
3482         u32 info = 0, tu_cmd = 0;
3483         unsigned int i;
3484
3485         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3486             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3487                 i = tx_ring->next_to_use;
3488                 buffer_info = &tx_ring->buffer_info[i];
3489                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3490
3491                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3492                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3493
3494                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3495                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3496                         info |= skb_network_header_len(skb);
3497
3498                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3499
3500                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3501
3502                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3503                         __be16 protocol;
3504
3505                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3506                                 const struct vlan_ethhdr *vhdr =
3507                                           (const struct vlan_ethhdr*)skb->data;
3508
3509                                 protocol = vhdr->h_vlan_encapsulated_proto;
3510                         } else {
3511                                 protocol = skb->protocol;
3512                         }
3513
3514                         switch (protocol) {
3515                         case cpu_to_be16(ETH_P_IP):
3516                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3517                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3518                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3519                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3520                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3521                                 break;
3522                         case cpu_to_be16(ETH_P_IPV6):
3523                                 /* XXX what about other V6 headers?? */
3524                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3525                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3526                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3527                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3528                                 break;
3529                         default:
3530                                 if (unlikely(net_ratelimit()))
3531                                         dev_warn(&pdev->dev,
3532                                             "partial checksum but proto=%x!\n",
3533                                             skb->protocol);
3534                                 break;
3535                         }
3536                 }
3537
3538                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3539                 context_desc->seqnum_seed = 0;
3540                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3541                         context_desc->mss_l4len_idx =
3542                                 cpu_to_le32(tx_ring->reg_idx << 4);
3543
3544                 buffer_info->time_stamp = jiffies;
3545                 buffer_info->next_to_watch = i;
3546                 buffer_info->dma = 0;
3547
3548                 i++;
3549                 if (i == tx_ring->count)
3550                         i = 0;
3551                 tx_ring->next_to_use = i;
3552
3553                 return true;
3554         }
3555         return false;
3556 }
3557
3558 #define IGB_MAX_TXD_PWR 16
3559 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3560
3561 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3562                                  unsigned int first)
3563 {
3564         struct igb_buffer *buffer_info;
3565         struct pci_dev *pdev = tx_ring->pdev;
3566         unsigned int len = skb_headlen(skb);
3567         unsigned int count = 0, i;
3568         unsigned int f;
3569
3570         i = tx_ring->next_to_use;
3571
3572         buffer_info = &tx_ring->buffer_info[i];
3573         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3574         buffer_info->length = len;
3575         /* set time_stamp *before* dma to help avoid a possible race */
3576         buffer_info->time_stamp = jiffies;
3577         buffer_info->next_to_watch = i;
3578         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3579                                           PCI_DMA_TODEVICE);
3580         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3581                 goto dma_error;
3582
3583         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3584                 struct skb_frag_struct *frag;
3585
3586                 count++;
3587                 i++;
3588                 if (i == tx_ring->count)
3589                         i = 0;
3590
3591                 frag = &skb_shinfo(skb)->frags[f];
3592                 len = frag->size;
3593
3594                 buffer_info = &tx_ring->buffer_info[i];
3595                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3596                 buffer_info->length = len;
3597                 buffer_info->time_stamp = jiffies;
3598                 buffer_info->next_to_watch = i;
3599                 buffer_info->mapped_as_page = true;
3600                 buffer_info->dma = pci_map_page(pdev,
3601                                                 frag->page,
3602                                                 frag->page_offset,
3603                                                 len,
3604                                                 PCI_DMA_TODEVICE);
3605                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3606                         goto dma_error;
3607
3608         }
3609
3610         tx_ring->buffer_info[i].skb = skb;
3611         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3612         tx_ring->buffer_info[first].next_to_watch = i;
3613
3614         return ++count;
3615
3616 dma_error:
3617         dev_err(&pdev->dev, "TX DMA map failed\n");
3618
3619         /* clear timestamp and dma mappings for failed buffer_info mapping */
3620         buffer_info->dma = 0;
3621         buffer_info->time_stamp = 0;
3622         buffer_info->length = 0;
3623         buffer_info->next_to_watch = 0;
3624         buffer_info->mapped_as_page = false;
3625
3626         /* clear timestamp and dma mappings for remaining portion of packet */
3627         while (count--) {
3628                 if (i == 0)
3629                         i = tx_ring->count;
3630                 i--;
3631                 buffer_info = &tx_ring->buffer_info[i];
3632                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3633         }
3634
3635         return 0;
3636 }
3637
3638 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3639                                     u32 tx_flags, int count, u32 paylen,
3640                                     u8 hdr_len)
3641 {
3642         union e1000_adv_tx_desc *tx_desc;
3643         struct igb_buffer *buffer_info;
3644         u32 olinfo_status = 0, cmd_type_len;
3645         unsigned int i = tx_ring->next_to_use;
3646
3647         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3648                         E1000_ADVTXD_DCMD_DEXT);
3649
3650         if (tx_flags & IGB_TX_FLAGS_VLAN)
3651                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3652
3653         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3654                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3655
3656         if (tx_flags & IGB_TX_FLAGS_TSO) {
3657                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3658
3659                 /* insert tcp checksum */
3660                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3661
3662                 /* insert ip checksum */
3663                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3664                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3665
3666         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3667                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3668         }
3669
3670         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3671             (tx_flags & (IGB_TX_FLAGS_CSUM |
3672                          IGB_TX_FLAGS_TSO |
3673                          IGB_TX_FLAGS_VLAN)))
3674                 olinfo_status |= tx_ring->reg_idx << 4;
3675
3676         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3677
3678         do {
3679                 buffer_info = &tx_ring->buffer_info[i];
3680                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3681                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3682                 tx_desc->read.cmd_type_len =
3683                         cpu_to_le32(cmd_type_len | buffer_info->length);
3684                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3685                 count--;
3686                 i++;
3687                 if (i == tx_ring->count)
3688                         i = 0;
3689         } while (count > 0);
3690
3691         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3692         /* Force memory writes to complete before letting h/w
3693          * know there are new descriptors to fetch.  (Only
3694          * applicable for weak-ordered memory model archs,
3695          * such as IA-64). */
3696         wmb();
3697
3698         tx_ring->next_to_use = i;
3699         writel(i, tx_ring->tail);
3700         /* we need this if more than one processor can write to our tail
3701          * at a time, it syncronizes IO on IA64/Altix systems */
3702         mmiowb();
3703 }
3704
3705 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3706 {
3707         struct net_device *netdev = tx_ring->netdev;
3708
3709         netif_stop_subqueue(netdev, tx_ring->queue_index);
3710
3711         /* Herbert's original patch had:
3712          *  smp_mb__after_netif_stop_queue();
3713          * but since that doesn't exist yet, just open code it. */
3714         smp_mb();
3715
3716         /* We need to check again in a case another CPU has just
3717          * made room available. */
3718         if (igb_desc_unused(tx_ring) < size)
3719                 return -EBUSY;
3720
3721         /* A reprieve! */
3722         netif_wake_subqueue(netdev, tx_ring->queue_index);
3723         tx_ring->tx_stats.restart_queue++;
3724         return 0;
3725 }
3726
3727 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3728 {
3729         if (igb_desc_unused(tx_ring) >= size)
3730                 return 0;
3731         return __igb_maybe_stop_tx(tx_ring, size);
3732 }
3733
3734 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3735                                     struct igb_ring *tx_ring)
3736 {
3737         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3738         int tso = 0, count;
3739         u32 tx_flags = 0;
3740         u16 first;
3741         u8 hdr_len = 0;
3742         union skb_shared_tx *shtx = skb_tx(skb);
3743
3744         /* need: 1 descriptor per page,
3745          *       + 2 desc gap to keep tail from touching head,
3746          *       + 1 desc for skb->data,
3747          *       + 1 desc for context descriptor,
3748          * otherwise try next time */
3749         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3750                 /* this is a hard error */
3751                 return NETDEV_TX_BUSY;
3752         }
3753
3754         if (unlikely(shtx->hardware)) {
3755                 shtx->in_progress = 1;
3756                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3757         }
3758
3759         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3760                 tx_flags |= IGB_TX_FLAGS_VLAN;
3761                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3762         }
3763
3764         if (skb->protocol == htons(ETH_P_IP))
3765                 tx_flags |= IGB_TX_FLAGS_IPV4;
3766
3767         first = tx_ring->next_to_use;
3768         if (skb_is_gso(skb)) {
3769                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3770
3771                 if (tso < 0) {
3772                         dev_kfree_skb_any(skb);
3773                         return NETDEV_TX_OK;
3774                 }
3775         }
3776
3777         if (tso)
3778                 tx_flags |= IGB_TX_FLAGS_TSO;
3779         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3780                  (skb->ip_summed == CHECKSUM_PARTIAL))
3781                 tx_flags |= IGB_TX_FLAGS_CSUM;
3782
3783         /*
3784          * count reflects descriptors mapped, if 0 or less then mapping error
3785          * has occured and we need to rewind the descriptor queue
3786          */
3787         count = igb_tx_map_adv(tx_ring, skb, first);
3788         if (!count) {
3789                 dev_kfree_skb_any(skb);
3790                 tx_ring->buffer_info[first].time_stamp = 0;
3791                 tx_ring->next_to_use = first;
3792                 return NETDEV_TX_OK;
3793         }
3794
3795         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3796
3797         /* Make sure there is space in the ring for the next send. */
3798         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3799
3800         return NETDEV_TX_OK;
3801 }
3802
3803 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3804                                       struct net_device *netdev)
3805 {
3806         struct igb_adapter *adapter = netdev_priv(netdev);
3807         struct igb_ring *tx_ring;
3808         int r_idx = 0;
3809
3810         if (test_bit(__IGB_DOWN, &adapter->state)) {
3811                 dev_kfree_skb_any(skb);
3812                 return NETDEV_TX_OK;
3813         }
3814
3815         if (skb->len <= 0) {
3816                 dev_kfree_skb_any(skb);
3817                 return NETDEV_TX_OK;
3818         }
3819
3820         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3821         tx_ring = adapter->multi_tx_table[r_idx];
3822
3823         /* This goes back to the question of how to logically map a tx queue
3824          * to a flow.  Right now, performance is impacted slightly negatively
3825          * if using multiple tx queues.  If the stack breaks away from a
3826          * single qdisc implementation, we can look at this again. */
3827         return igb_xmit_frame_ring_adv(skb, tx_ring);
3828 }
3829
3830 /**
3831  * igb_tx_timeout - Respond to a Tx Hang
3832  * @netdev: network interface device structure
3833  **/
3834 static void igb_tx_timeout(struct net_device *netdev)
3835 {
3836         struct igb_adapter *adapter = netdev_priv(netdev);
3837         struct e1000_hw *hw = &adapter->hw;
3838
3839         /* Do the reset outside of interrupt context */
3840         adapter->tx_timeout_count++;
3841
3842         if (hw->mac.type == e1000_82580)
3843                 hw->dev_spec._82575.global_device_reset = true;
3844
3845         schedule_work(&adapter->reset_task);
3846         wr32(E1000_EICS,
3847              (adapter->eims_enable_mask & ~adapter->eims_other));
3848 }
3849
3850 static void igb_reset_task(struct work_struct *work)
3851 {
3852         struct igb_adapter *adapter;
3853         adapter = container_of(work, struct igb_adapter, reset_task);
3854
3855         igb_reinit_locked(adapter);
3856 }
3857
3858 /**
3859  * igb_get_stats - Get System Network Statistics
3860  * @netdev: network interface device structure
3861  *
3862  * Returns the address of the device statistics structure.
3863  * The statistics are actually updated from the timer callback.
3864  **/
3865 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3866 {
3867         /* only return the current stats */
3868         return &netdev->stats;
3869 }
3870
3871 /**
3872  * igb_change_mtu - Change the Maximum Transfer Unit
3873  * @netdev: network interface device structure
3874  * @new_mtu: new value for maximum frame size
3875  *
3876  * Returns 0 on success, negative on failure
3877  **/
3878 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3879 {
3880         struct igb_adapter *adapter = netdev_priv(netdev);
3881         struct pci_dev *pdev = adapter->pdev;
3882         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3883         u32 rx_buffer_len, i;
3884
3885         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3886                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3887                 return -EINVAL;
3888         }
3889
3890         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3891                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3892                 return -EINVAL;
3893         }
3894
3895         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3896                 msleep(1);
3897
3898         /* igb_down has a dependency on max_frame_size */
3899         adapter->max_frame_size = max_frame;
3900
3901         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3902          * means we reserve 2 more, this pushes us to allocate from the next
3903          * larger slab size.
3904          * i.e. RXBUFFER_2048 --> size-4096 slab
3905          */
3906
3907         if (adapter->hw.mac.type == e1000_82580)
3908                 max_frame += IGB_TS_HDR_LEN;
3909
3910         if (max_frame <= IGB_RXBUFFER_1024)
3911                 rx_buffer_len = IGB_RXBUFFER_1024;
3912         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3913                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3914         else
3915                 rx_buffer_len = IGB_RXBUFFER_128;
3916
3917         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
3918              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
3919                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
3920
3921         if ((adapter->hw.mac.type == e1000_82580) &&
3922             (rx_buffer_len == IGB_RXBUFFER_128))
3923                 rx_buffer_len += IGB_RXBUFFER_64;
3924
3925         if (netif_running(netdev))
3926                 igb_down(adapter);
3927
3928         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3929                  netdev->mtu, new_mtu);
3930         netdev->mtu = new_mtu;
3931
3932         for (i = 0; i < adapter->num_rx_queues; i++)
3933                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3934
3935         if (netif_running(netdev))
3936                 igb_up(adapter);
3937         else
3938                 igb_reset(adapter);
3939
3940         clear_bit(__IGB_RESETTING, &adapter->state);
3941
3942         return 0;
3943 }
3944
3945 /**
3946  * igb_update_stats - Update the board statistics counters
3947  * @adapter: board private structure
3948  **/
3949
3950 void igb_update_stats(struct igb_adapter *adapter)
3951 {
3952         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3953         struct e1000_hw *hw = &adapter->hw;
3954         struct pci_dev *pdev = adapter->pdev;
3955         u32 reg, mpc;
3956         u16 phy_tmp;
3957         int i;
3958         u64 bytes, packets;
3959
3960 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3961
3962         /*
3963          * Prevent stats update while adapter is being reset, or if the pci
3964          * connection is down.
3965          */
3966         if (adapter->link_speed == 0)
3967                 return;
3968         if (pci_channel_offline(pdev))
3969                 return;
3970
3971         bytes = 0;
3972         packets = 0;
3973         for (i = 0; i < adapter->num_rx_queues; i++) {
3974                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3975                 struct igb_ring *ring = adapter->rx_ring[i];
3976                 ring->rx_stats.drops += rqdpc_tmp;
3977                 net_stats->rx_fifo_errors += rqdpc_tmp;
3978                 bytes += ring->rx_stats.bytes;
3979                 packets += ring->rx_stats.packets;
3980         }
3981
3982         net_stats->rx_bytes = bytes;
3983         net_stats->rx_packets = packets;
3984
3985         bytes = 0;
3986         packets = 0;
3987         for (i = 0; i < adapter->num_tx_queues; i++) {
3988                 struct igb_ring *ring = adapter->tx_ring[i];
3989                 bytes += ring->tx_stats.bytes;
3990                 packets += ring->tx_stats.packets;
3991         }
3992         net_stats->tx_bytes = bytes;
3993         net_stats->tx_packets = packets;
3994
3995         /* read stats registers */
3996         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3997         adapter->stats.gprc += rd32(E1000_GPRC);
3998         adapter->stats.gorc += rd32(E1000_GORCL);
3999         rd32(E1000_GORCH); /* clear GORCL */
4000         adapter->stats.bprc += rd32(E1000_BPRC);
4001         adapter->stats.mprc += rd32(E1000_MPRC);
4002         adapter->stats.roc += rd32(E1000_ROC);
4003
4004         adapter->stats.prc64 += rd32(E1000_PRC64);
4005         adapter->stats.prc127 += rd32(E1000_PRC127);
4006         adapter->stats.prc255 += rd32(E1000_PRC255);
4007         adapter->stats.prc511 += rd32(E1000_PRC511);
4008         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4009         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4010         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4011         adapter->stats.sec += rd32(E1000_SEC);
4012
4013         mpc = rd32(E1000_MPC);
4014         adapter->stats.mpc += mpc;
4015         net_stats->rx_fifo_errors += mpc;
4016         adapter->stats.scc += rd32(E1000_SCC);
4017         adapter->stats.ecol += rd32(E1000_ECOL);
4018         adapter->stats.mcc += rd32(E1000_MCC);
4019         adapter->stats.latecol += rd32(E1000_LATECOL);
4020         adapter->stats.dc += rd32(E1000_DC);
4021         adapter->stats.rlec += rd32(E1000_RLEC);
4022         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4023         adapter->stats.xontxc += rd32(E1000_XONTXC);
4024         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4025         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4026         adapter->stats.fcruc += rd32(E1000_FCRUC);
4027         adapter->stats.gptc += rd32(E1000_GPTC);
4028         adapter->stats.gotc += rd32(E1000_GOTCL);
4029         rd32(E1000_GOTCH); /* clear GOTCL */
4030         adapter->stats.rnbc += rd32(E1000_RNBC);
4031         adapter->stats.ruc += rd32(E1000_RUC);
4032         adapter->stats.rfc += rd32(E1000_RFC);
4033         adapter->stats.rjc += rd32(E1000_RJC);
4034         adapter->stats.tor += rd32(E1000_TORH);
4035         adapter->stats.tot += rd32(E1000_TOTH);
4036         adapter->stats.tpr += rd32(E1000_TPR);
4037
4038         adapter->stats.ptc64 += rd32(E1000_PTC64);
4039         adapter->stats.ptc127 += rd32(E1000_PTC127);
4040         adapter->stats.ptc255 += rd32(E1000_PTC255);
4041         adapter->stats.ptc511 += rd32(E1000_PTC511);
4042         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4043         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4044
4045         adapter->stats.mptc += rd32(E1000_MPTC);
4046         adapter->stats.bptc += rd32(E1000_BPTC);
4047
4048         adapter->stats.tpt += rd32(E1000_TPT);
4049         adapter->stats.colc += rd32(E1000_COLC);
4050
4051         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4052         /* read internal phy specific stats */
4053         reg = rd32(E1000_CTRL_EXT);
4054         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4055                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4056                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4057         }
4058
4059         adapter->stats.tsctc += rd32(E1000_TSCTC);
4060         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4061
4062         adapter->stats.iac += rd32(E1000_IAC);
4063         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4064         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4065         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4066         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4067         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4068         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4069         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4070         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4071
4072         /* Fill out the OS statistics structure */
4073         net_stats->multicast = adapter->stats.mprc;
4074         net_stats->collisions = adapter->stats.colc;
4075
4076         /* Rx Errors */
4077
4078         /* RLEC on some newer hardware can be incorrect so build
4079          * our own version based on RUC and ROC */
4080         net_stats->rx_errors = adapter->stats.rxerrc +
4081                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4082                 adapter->stats.ruc + adapter->stats.roc +
4083                 adapter->stats.cexterr;
4084         net_stats->rx_length_errors = adapter->stats.ruc +
4085                                       adapter->stats.roc;
4086         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4087         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4088         net_stats->rx_missed_errors = adapter->stats.mpc;
4089
4090         /* Tx Errors */
4091         net_stats->tx_errors = adapter->stats.ecol +
4092                                adapter->stats.latecol;
4093         net_stats->tx_aborted_errors = adapter->stats.ecol;
4094         net_stats->tx_window_errors = adapter->stats.latecol;
4095         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4096
4097         /* Tx Dropped needs to be maintained elsewhere */
4098
4099         /* Phy Stats */
4100         if (hw->phy.media_type == e1000_media_type_copper) {
4101                 if ((adapter->link_speed == SPEED_1000) &&
4102                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4103                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4104                         adapter->phy_stats.idle_errors += phy_tmp;
4105                 }
4106         }
4107
4108         /* Management Stats */
4109         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4110         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4111         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4112 }
4113
4114 static irqreturn_t igb_msix_other(int irq, void *data)
4115 {
4116         struct igb_adapter *adapter = data;
4117         struct e1000_hw *hw = &adapter->hw;
4118         u32 icr = rd32(E1000_ICR);
4119         /* reading ICR causes bit 31 of EICR to be cleared */
4120
4121         if (icr & E1000_ICR_DRSTA)
4122                 schedule_work(&adapter->reset_task);
4123
4124         if (icr & E1000_ICR_DOUTSYNC) {
4125                 /* HW is reporting DMA is out of sync */
4126                 adapter->stats.doosync++;
4127         }
4128
4129         /* Check for a mailbox event */
4130         if (icr & E1000_ICR_VMMB)
4131                 igb_msg_task(adapter);
4132
4133         if (icr & E1000_ICR_LSC) {
4134                 hw->mac.get_link_status = 1;
4135                 /* guard against interrupt when we're going down */
4136                 if (!test_bit(__IGB_DOWN, &adapter->state))
4137                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4138         }
4139
4140         if (adapter->vfs_allocated_count)
4141                 wr32(E1000_IMS, E1000_IMS_LSC |
4142                                 E1000_IMS_VMMB |
4143                                 E1000_IMS_DOUTSYNC);
4144         else
4145                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4146         wr32(E1000_EIMS, adapter->eims_other);
4147
4148         return IRQ_HANDLED;
4149 }
4150
4151 static void igb_write_itr(struct igb_q_vector *q_vector)
4152 {
4153         struct igb_adapter *adapter = q_vector->adapter;
4154         u32 itr_val = q_vector->itr_val & 0x7FFC;
4155
4156         if (!q_vector->set_itr)
4157                 return;
4158
4159         if (!itr_val)
4160                 itr_val = 0x4;
4161
4162         if (adapter->hw.mac.type == e1000_82575)
4163                 itr_val |= itr_val << 16;
4164         else
4165                 itr_val |= 0x8000000;
4166
4167         writel(itr_val, q_vector->itr_register);
4168         q_vector->set_itr = 0;
4169 }
4170
4171 static irqreturn_t igb_msix_ring(int irq, void *data)
4172 {
4173         struct igb_q_vector *q_vector = data;
4174
4175         /* Write the ITR value calculated from the previous interrupt. */
4176         igb_write_itr(q_vector);
4177
4178         napi_schedule(&q_vector->napi);
4179
4180         return IRQ_HANDLED;
4181 }
4182
4183 #ifdef CONFIG_IGB_DCA
4184 static void igb_update_dca(struct igb_q_vector *q_vector)
4185 {
4186         struct igb_adapter *adapter = q_vector->adapter;
4187         struct e1000_hw *hw = &adapter->hw;
4188         int cpu = get_cpu();
4189
4190         if (q_vector->cpu == cpu)
4191                 goto out_no_update;
4192
4193         if (q_vector->tx_ring) {
4194                 int q = q_vector->tx_ring->reg_idx;
4195                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4196                 if (hw->mac.type == e1000_82575) {
4197                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4198                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4199                 } else {
4200                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4201                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4202                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4203                 }
4204                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4205                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4206         }
4207         if (q_vector->rx_ring) {
4208                 int q = q_vector->rx_ring->reg_idx;
4209                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4210                 if (hw->mac.type == e1000_82575) {
4211                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4212                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4213                 } else {
4214                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4215                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4216                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4217                 }
4218                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4219                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4220                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4221                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4222         }
4223         q_vector->cpu = cpu;
4224 out_no_update:
4225         put_cpu();
4226 }
4227
4228 static void igb_setup_dca(struct igb_adapter *adapter)
4229 {
4230         struct e1000_hw *hw = &adapter->hw;
4231         int i;
4232
4233         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4234                 return;
4235
4236         /* Always use CB2 mode, difference is masked in the CB driver. */
4237         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4238
4239         for (i = 0; i < adapter->num_q_vectors; i++) {
4240                 adapter->q_vector[i]->cpu = -1;
4241                 igb_update_dca(adapter->q_vector[i]);
4242         }
4243 }
4244
4245 static int __igb_notify_dca(struct device *dev, void *data)
4246 {
4247         struct net_device *netdev = dev_get_drvdata(dev);
4248         struct igb_adapter *adapter = netdev_priv(netdev);
4249         struct pci_dev *pdev = adapter->pdev;
4250         struct e1000_hw *hw = &adapter->hw;
4251         unsigned long event = *(unsigned long *)data;
4252
4253         switch (event) {
4254         case DCA_PROVIDER_ADD:
4255                 /* if already enabled, don't do it again */
4256                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4257                         break;
4258                 if (dca_add_requester(dev) == 0) {
4259                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4260                         dev_info(&pdev->dev, "DCA enabled\n");
4261                         igb_setup_dca(adapter);
4262                         break;
4263                 }
4264                 /* Fall Through since DCA is disabled. */
4265         case DCA_PROVIDER_REMOVE:
4266                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4267                         /* without this a class_device is left
4268                          * hanging around in the sysfs model */
4269                         dca_remove_requester(dev);
4270                         dev_info(&pdev->dev, "DCA disabled\n");
4271                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4272                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4273                 }
4274                 break;
4275         }
4276
4277         return 0;
4278 }
4279
4280 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4281                           void *p)
4282 {
4283         int ret_val;
4284
4285         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4286                                          __igb_notify_dca);
4287
4288         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4289 }
4290 #endif /* CONFIG_IGB_DCA */
4291
4292 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4293 {
4294         struct e1000_hw *hw = &adapter->hw;
4295         u32 ping;
4296         int i;
4297
4298         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4299                 ping = E1000_PF_CONTROL_MSG;
4300                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4301                         ping |= E1000_VT_MSGTYPE_CTS;
4302                 igb_write_mbx(hw, &ping, 1, i);
4303         }
4304 }
4305
4306 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4307 {
4308         struct e1000_hw *hw = &adapter->hw;
4309         u32 vmolr = rd32(E1000_VMOLR(vf));
4310         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4311
4312         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4313                             IGB_VF_FLAG_MULTI_PROMISC);
4314         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4315
4316         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4317                 vmolr |= E1000_VMOLR_MPME;
4318                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4319         } else {
4320                 /*
4321                  * if we have hashes and we are clearing a multicast promisc
4322                  * flag we need to write the hashes to the MTA as this step
4323                  * was previously skipped
4324                  */
4325                 if (vf_data->num_vf_mc_hashes > 30) {
4326                         vmolr |= E1000_VMOLR_MPME;
4327                 } else if (vf_data->num_vf_mc_hashes) {
4328                         int j;
4329                         vmolr |= E1000_VMOLR_ROMPE;
4330                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4331                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4332                 }
4333         }
4334
4335         wr32(E1000_VMOLR(vf), vmolr);
4336
4337         /* there are flags left unprocessed, likely not supported */
4338         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4339                 return -EINVAL;
4340
4341         return 0;
4342
4343 }
4344
4345 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4346                                   u32 *msgbuf, u32 vf)
4347 {
4348         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4349         u16 *hash_list = (u16 *)&msgbuf[1];
4350         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4351         int i;
4352
4353         /* salt away the number of multicast addresses assigned
4354          * to this VF for later use to restore when the PF multi cast
4355          * list changes
4356          */
4357         vf_data->num_vf_mc_hashes = n;
4358
4359         /* only up to 30 hash values supported */
4360         if (n > 30)
4361                 n = 30;
4362
4363         /* store the hashes for later use */
4364         for (i = 0; i < n; i++)
4365                 vf_data->vf_mc_hashes[i] = hash_list[i];
4366
4367         /* Flush and reset the mta with the new values */
4368         igb_set_rx_mode(adapter->netdev);
4369
4370         return 0;
4371 }
4372
4373 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4374 {
4375         struct e1000_hw *hw = &adapter->hw;
4376         struct vf_data_storage *vf_data;
4377         int i, j;
4378
4379         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4380                 u32 vmolr = rd32(E1000_VMOLR(i));
4381                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4382
4383                 vf_data = &adapter->vf_data[i];
4384
4385                 if ((vf_data->num_vf_mc_hashes > 30) ||
4386                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4387                         vmolr |= E1000_VMOLR_MPME;
4388                 } else if (vf_data->num_vf_mc_hashes) {
4389                         vmolr |= E1000_VMOLR_ROMPE;
4390                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4391                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4392                 }
4393                 wr32(E1000_VMOLR(i), vmolr);
4394         }
4395 }
4396
4397 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4398 {
4399         struct e1000_hw *hw = &adapter->hw;
4400         u32 pool_mask, reg, vid;
4401         int i;
4402
4403         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4404
4405         /* Find the vlan filter for this id */
4406         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4407                 reg = rd32(E1000_VLVF(i));
4408
4409                 /* remove the vf from the pool */
4410                 reg &= ~pool_mask;
4411
4412                 /* if pool is empty then remove entry from vfta */
4413                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4414                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4415                         reg = 0;
4416                         vid = reg & E1000_VLVF_VLANID_MASK;
4417                         igb_vfta_set(hw, vid, false);
4418                 }
4419
4420                 wr32(E1000_VLVF(i), reg);
4421         }
4422
4423         adapter->vf_data[vf].vlans_enabled = 0;
4424 }
4425
4426 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4427 {
4428         struct e1000_hw *hw = &adapter->hw;
4429         u32 reg, i;
4430
4431         /* The vlvf table only exists on 82576 hardware and newer */
4432         if (hw->mac.type < e1000_82576)
4433                 return -1;
4434
4435         /* we only need to do this if VMDq is enabled */
4436         if (!adapter->vfs_allocated_count)
4437                 return -1;
4438
4439         /* Find the vlan filter for this id */
4440         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4441                 reg = rd32(E1000_VLVF(i));
4442                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4443                     vid == (reg & E1000_VLVF_VLANID_MASK))
4444                         break;
4445         }
4446
4447         if (add) {
4448                 if (i == E1000_VLVF_ARRAY_SIZE) {
4449                         /* Did not find a matching VLAN ID entry that was
4450                          * enabled.  Search for a free filter entry, i.e.
4451                          * one without the enable bit set
4452                          */
4453                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4454                                 reg = rd32(E1000_VLVF(i));
4455                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4456                                         break;
4457                         }
4458                 }
4459                 if (i < E1000_VLVF_ARRAY_SIZE) {
4460                         /* Found an enabled/available entry */
4461                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4462
4463                         /* if !enabled we need to set this up in vfta */
4464                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4465                                 /* add VID to filter table */
4466                                 igb_vfta_set(hw, vid, true);
4467                                 reg |= E1000_VLVF_VLANID_ENABLE;
4468                         }
4469                         reg &= ~E1000_VLVF_VLANID_MASK;
4470                         reg |= vid;
4471                         wr32(E1000_VLVF(i), reg);
4472
4473                         /* do not modify RLPML for PF devices */
4474                         if (vf >= adapter->vfs_allocated_count)
4475                                 return 0;
4476
4477                         if (!adapter->vf_data[vf].vlans_enabled) {
4478                                 u32 size;
4479                                 reg = rd32(E1000_VMOLR(vf));
4480                                 size = reg & E1000_VMOLR_RLPML_MASK;
4481                                 size += 4;
4482                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4483                                 reg |= size;
4484                                 wr32(E1000_VMOLR(vf), reg);
4485                         }
4486
4487                         adapter->vf_data[vf].vlans_enabled++;
4488                         return 0;
4489                 }
4490         } else {
4491                 if (i < E1000_VLVF_ARRAY_SIZE) {
4492                         /* remove vf from the pool */
4493                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4494                         /* if pool is empty then remove entry from vfta */
4495                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4496                                 reg = 0;
4497                                 igb_vfta_set(hw, vid, false);
4498                         }
4499                         wr32(E1000_VLVF(i), reg);
4500
4501                         /* do not modify RLPML for PF devices */
4502                         if (vf >= adapter->vfs_allocated_count)
4503                                 return 0;
4504
4505                         adapter->vf_data[vf].vlans_enabled--;
4506                         if (!adapter->vf_data[vf].vlans_enabled) {
4507                                 u32 size;
4508                                 reg = rd32(E1000_VMOLR(vf));
4509                                 size = reg & E1000_VMOLR_RLPML_MASK;
4510                                 size -= 4;
4511                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4512                                 reg |= size;
4513                                 wr32(E1000_VMOLR(vf), reg);
4514                         }
4515                 }
4516         }
4517         return 0;
4518 }
4519
4520 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4521 {
4522         struct e1000_hw *hw = &adapter->hw;
4523
4524         if (vid)
4525                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4526         else
4527                 wr32(E1000_VMVIR(vf), 0);
4528 }
4529
4530 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4531                                int vf, u16 vlan, u8 qos)
4532 {
4533         int err = 0;
4534         struct igb_adapter *adapter = netdev_priv(netdev);
4535
4536         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4537                 return -EINVAL;
4538         if (vlan || qos) {
4539                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4540                 if (err)
4541                         goto out;
4542                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4543                 igb_set_vmolr(adapter, vf, !vlan);
4544                 adapter->vf_data[vf].pf_vlan = vlan;
4545                 adapter->vf_data[vf].pf_qos = qos;
4546                 dev_info(&adapter->pdev->dev,
4547                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4548                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4549                         dev_warn(&adapter->pdev->dev,
4550                                  "The VF VLAN has been set,"
4551                                  " but the PF device is not up.\n");
4552                         dev_warn(&adapter->pdev->dev,
4553                                  "Bring the PF device up before"
4554                                  " attempting to use the VF device.\n");
4555                 }
4556         } else {
4557                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4558                                    false, vf);
4559                 igb_set_vmvir(adapter, vlan, vf);
4560                 igb_set_vmolr(adapter, vf, true);
4561                 adapter->vf_data[vf].pf_vlan = 0;
4562                 adapter->vf_data[vf].pf_qos = 0;
4563        }
4564 out:
4565        return err;
4566 }
4567
4568 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4569 {
4570         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4571         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4572
4573         return igb_vlvf_set(adapter, vid, add, vf);
4574 }
4575
4576 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4577 {
4578         /* clear flags */
4579         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4580         adapter->vf_data[vf].last_nack = jiffies;
4581
4582         /* reset offloads to defaults */
4583         igb_set_vmolr(adapter, vf, true);
4584
4585         /* reset vlans for device */
4586         igb_clear_vf_vfta(adapter, vf);
4587         if (adapter->vf_data[vf].pf_vlan)
4588                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4589                                     adapter->vf_data[vf].pf_vlan,
4590                                     adapter->vf_data[vf].pf_qos);
4591         else
4592                 igb_clear_vf_vfta(adapter, vf);
4593
4594         /* reset multicast table array for vf */
4595         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4596
4597         /* Flush and reset the mta with the new values */
4598         igb_set_rx_mode(adapter->netdev);
4599 }
4600
4601 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4602 {
4603         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4604
4605         /* generate a new mac address as we were hotplug removed/added */
4606         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4607                 random_ether_addr(vf_mac);
4608
4609         /* process remaining reset events */
4610         igb_vf_reset(adapter, vf);
4611 }
4612
4613 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4614 {
4615         struct e1000_hw *hw = &adapter->hw;
4616         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4617         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4618         u32 reg, msgbuf[3];
4619         u8 *addr = (u8 *)(&msgbuf[1]);
4620
4621         /* process all the same items cleared in a function level reset */
4622         igb_vf_reset(adapter, vf);
4623
4624         /* set vf mac address */
4625         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4626
4627         /* enable transmit and receive for vf */
4628         reg = rd32(E1000_VFTE);
4629         wr32(E1000_VFTE, reg | (1 << vf));
4630         reg = rd32(E1000_VFRE);
4631         wr32(E1000_VFRE, reg | (1 << vf));
4632
4633         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4634
4635         /* reply to reset with ack and vf mac address */
4636         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4637         memcpy(addr, vf_mac, 6);
4638         igb_write_mbx(hw, msgbuf, 3, vf);
4639 }
4640
4641 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4642 {
4643         unsigned char *addr = (char *)&msg[1];
4644         int err = -1;
4645
4646         if (is_valid_ether_addr(addr))
4647                 err = igb_set_vf_mac(adapter, vf, addr);
4648
4649         return err;
4650 }
4651
4652 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4653 {
4654         struct e1000_hw *hw = &adapter->hw;
4655         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4656         u32 msg = E1000_VT_MSGTYPE_NACK;
4657
4658         /* if device isn't clear to send it shouldn't be reading either */
4659         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4660             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4661                 igb_write_mbx(hw, &msg, 1, vf);
4662                 vf_data->last_nack = jiffies;
4663         }
4664 }
4665
4666 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4667 {
4668         struct pci_dev *pdev = adapter->pdev;
4669         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4670         struct e1000_hw *hw = &adapter->hw;
4671         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4672         s32 retval;
4673
4674         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4675
4676         if (retval) {
4677                 /* if receive failed revoke VF CTS stats and restart init */
4678                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4679                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4680                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4681                         return;
4682                 goto out;
4683         }
4684
4685         /* this is a message we already processed, do nothing */
4686         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4687                 return;
4688
4689         /*
4690          * until the vf completes a reset it should not be
4691          * allowed to start any configuration.
4692          */
4693
4694         if (msgbuf[0] == E1000_VF_RESET) {
4695                 igb_vf_reset_msg(adapter, vf);
4696                 return;
4697         }
4698
4699         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4700                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4701                         return;
4702                 retval = -1;
4703                 goto out;
4704         }
4705
4706         switch ((msgbuf[0] & 0xFFFF)) {
4707         case E1000_VF_SET_MAC_ADDR:
4708                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4709                 break;
4710         case E1000_VF_SET_PROMISC:
4711                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4712                 break;
4713         case E1000_VF_SET_MULTICAST:
4714                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4715                 break;
4716         case E1000_VF_SET_LPE:
4717                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4718                 break;
4719         case E1000_VF_SET_VLAN:
4720                 if (adapter->vf_data[vf].pf_vlan)
4721                         retval = -1;
4722                 else
4723                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4724                 break;
4725         default:
4726                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4727                 retval = -1;
4728                 break;
4729         }
4730
4731         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4732 out:
4733         /* notify the VF of the results of what it sent us */
4734         if (retval)
4735                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4736         else
4737                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4738
4739         igb_write_mbx(hw, msgbuf, 1, vf);
4740 }
4741
4742 static void igb_msg_task(struct igb_adapter *adapter)
4743 {
4744         struct e1000_hw *hw = &adapter->hw;
4745         u32 vf;
4746
4747         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4748                 /* process any reset requests */
4749                 if (!igb_check_for_rst(hw, vf))
4750                         igb_vf_reset_event(adapter, vf);
4751
4752                 /* process any messages pending */
4753                 if (!igb_check_for_msg(hw, vf))
4754                         igb_rcv_msg_from_vf(adapter, vf);
4755
4756                 /* process any acks */
4757                 if (!igb_check_for_ack(hw, vf))
4758                         igb_rcv_ack_from_vf(adapter, vf);
4759         }
4760 }
4761
4762 /**
4763  *  igb_set_uta - Set unicast filter table address
4764  *  @adapter: board private structure
4765  *
4766  *  The unicast table address is a register array of 32-bit registers.
4767  *  The table is meant to be used in a way similar to how the MTA is used
4768  *  however due to certain limitations in the hardware it is necessary to
4769  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4770  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4771  **/
4772 static void igb_set_uta(struct igb_adapter *adapter)
4773 {
4774         struct e1000_hw *hw = &adapter->hw;
4775         int i;
4776
4777         /* The UTA table only exists on 82576 hardware and newer */
4778         if (hw->mac.type < e1000_82576)
4779                 return;
4780
4781         /* we only need to do this if VMDq is enabled */
4782         if (!adapter->vfs_allocated_count)
4783                 return;
4784
4785         for (i = 0; i < hw->mac.uta_reg_count; i++)
4786                 array_wr32(E1000_UTA, i, ~0);
4787 }
4788
4789 /**
4790  * igb_intr_msi - Interrupt Handler
4791  * @irq: interrupt number
4792  * @data: pointer to a network interface device structure
4793  **/
4794 static irqreturn_t igb_intr_msi(int irq, void *data)
4795 {
4796         struct igb_adapter *adapter = data;
4797         struct igb_q_vector *q_vector = adapter->q_vector[0];
4798         struct e1000_hw *hw = &adapter->hw;
4799         /* read ICR disables interrupts using IAM */
4800         u32 icr = rd32(E1000_ICR);
4801
4802         igb_write_itr(q_vector);
4803
4804         if (icr & E1000_ICR_DRSTA)
4805                 schedule_work(&adapter->reset_task);
4806
4807         if (icr & E1000_ICR_DOUTSYNC) {
4808                 /* HW is reporting DMA is out of sync */
4809                 adapter->stats.doosync++;
4810         }
4811
4812         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4813                 hw->mac.get_link_status = 1;
4814                 if (!test_bit(__IGB_DOWN, &adapter->state))
4815                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4816         }
4817
4818         napi_schedule(&q_vector->napi);
4819
4820         return IRQ_HANDLED;
4821 }
4822
4823 /**
4824  * igb_intr - Legacy Interrupt Handler
4825  * @irq: interrupt number
4826  * @data: pointer to a network interface device structure
4827  **/
4828 static irqreturn_t igb_intr(int irq, void *data)
4829 {
4830         struct igb_adapter *adapter = data;
4831         struct igb_q_vector *q_vector = adapter->q_vector[0];
4832         struct e1000_hw *hw = &adapter->hw;
4833         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4834          * need for the IMC write */
4835         u32 icr = rd32(E1000_ICR);
4836         if (!icr)
4837                 return IRQ_NONE;  /* Not our interrupt */
4838
4839         igb_write_itr(q_vector);
4840
4841         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4842          * not set, then the adapter didn't send an interrupt */
4843         if (!(icr & E1000_ICR_INT_ASSERTED))
4844                 return IRQ_NONE;
4845
4846         if (icr & E1000_ICR_DRSTA)
4847                 schedule_work(&adapter->reset_task);
4848
4849         if (icr & E1000_ICR_DOUTSYNC) {
4850                 /* HW is reporting DMA is out of sync */
4851                 adapter->stats.doosync++;
4852         }
4853
4854         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4855                 hw->mac.get_link_status = 1;
4856                 /* guard against interrupt when we're going down */
4857                 if (!test_bit(__IGB_DOWN, &adapter->state))
4858                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4859         }
4860
4861         napi_schedule(&q_vector->napi);
4862
4863         return IRQ_HANDLED;
4864 }
4865
4866 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4867 {
4868         struct igb_adapter *adapter = q_vector->adapter;
4869         struct e1000_hw *hw = &adapter->hw;
4870
4871         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4872             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4873                 if (!adapter->msix_entries)
4874                         igb_set_itr(adapter);
4875                 else
4876                         igb_update_ring_itr(q_vector);
4877         }
4878
4879         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4880                 if (adapter->msix_entries)
4881                         wr32(E1000_EIMS, q_vector->eims_value);
4882                 else
4883                         igb_irq_enable(adapter);
4884         }
4885 }
4886
4887 /**
4888  * igb_poll - NAPI Rx polling callback
4889  * @napi: napi polling structure
4890  * @budget: count of how many packets we should handle
4891  **/
4892 static int igb_poll(struct napi_struct *napi, int budget)
4893 {
4894         struct igb_q_vector *q_vector = container_of(napi,
4895                                                      struct igb_q_vector,
4896                                                      napi);
4897         int tx_clean_complete = 1, work_done = 0;
4898
4899 #ifdef CONFIG_IGB_DCA
4900         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4901                 igb_update_dca(q_vector);
4902 #endif
4903         if (q_vector->tx_ring)
4904                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4905
4906         if (q_vector->rx_ring)
4907                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4908
4909         if (!tx_clean_complete)
4910                 work_done = budget;
4911
4912         /* If not enough Rx work done, exit the polling mode */
4913         if (work_done < budget) {
4914                 napi_complete(napi);
4915                 igb_ring_irq_enable(q_vector);
4916         }
4917
4918         return work_done;
4919 }
4920
4921 /**
4922  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4923  * @adapter: board private structure
4924  * @shhwtstamps: timestamp structure to update
4925  * @regval: unsigned 64bit system time value.
4926  *
4927  * We need to convert the system time value stored in the RX/TXSTMP registers
4928  * into a hwtstamp which can be used by the upper level timestamping functions
4929  */
4930 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4931                                    struct skb_shared_hwtstamps *shhwtstamps,
4932                                    u64 regval)
4933 {
4934         u64 ns;
4935
4936         /*
4937          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4938          * 24 to match clock shift we setup earlier.
4939          */
4940         if (adapter->hw.mac.type == e1000_82580)
4941                 regval <<= IGB_82580_TSYNC_SHIFT;
4942
4943         ns = timecounter_cyc2time(&adapter->clock, regval);
4944         timecompare_update(&adapter->compare, ns);
4945         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4946         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4947         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4948 }
4949
4950 /**
4951  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4952  * @q_vector: pointer to q_vector containing needed info
4953  * @skb: packet that was just sent
4954  *
4955  * If we were asked to do hardware stamping and such a time stamp is
4956  * available, then it must have been for this skb here because we only
4957  * allow only one such packet into the queue.
4958  */
4959 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4960 {
4961         struct igb_adapter *adapter = q_vector->adapter;
4962         union skb_shared_tx *shtx = skb_tx(skb);
4963         struct e1000_hw *hw = &adapter->hw;
4964         struct skb_shared_hwtstamps shhwtstamps;
4965         u64 regval;
4966
4967         /* if skb does not support hw timestamp or TX stamp not valid exit */
4968         if (likely(!shtx->hardware) ||
4969             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4970                 return;
4971
4972         regval = rd32(E1000_TXSTMPL);
4973         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4974
4975         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4976         skb_tstamp_tx(skb, &shhwtstamps);
4977 }
4978
4979 /**
4980  * igb_clean_tx_irq - Reclaim resources after transmit completes
4981  * @q_vector: pointer to q_vector containing needed info
4982  * returns true if ring is completely cleaned
4983  **/
4984 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4985 {
4986         struct igb_adapter *adapter = q_vector->adapter;
4987         struct igb_ring *tx_ring = q_vector->tx_ring;
4988         struct net_device *netdev = tx_ring->netdev;
4989         struct e1000_hw *hw = &adapter->hw;
4990         struct igb_buffer *buffer_info;
4991         struct sk_buff *skb;
4992         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4993         unsigned int total_bytes = 0, total_packets = 0;
4994         unsigned int i, eop, count = 0;
4995         bool cleaned = false;
4996
4997         i = tx_ring->next_to_clean;
4998         eop = tx_ring->buffer_info[i].next_to_watch;
4999         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5000
5001         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5002                (count < tx_ring->count)) {
5003                 for (cleaned = false; !cleaned; count++) {
5004                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5005                         buffer_info = &tx_ring->buffer_info[i];
5006                         cleaned = (i == eop);
5007                         skb = buffer_info->skb;
5008
5009                         if (skb) {
5010                                 unsigned int segs, bytecount;
5011                                 /* gso_segs is currently only valid for tcp */
5012                                 segs = buffer_info->gso_segs;
5013                                 /* multiply data chunks by size of headers */
5014                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5015                                             skb->len;
5016                                 total_packets += segs;
5017                                 total_bytes += bytecount;
5018
5019                                 igb_tx_hwtstamp(q_vector, skb);
5020                         }
5021
5022                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5023                         tx_desc->wb.status = 0;
5024
5025                         i++;
5026                         if (i == tx_ring->count)
5027                                 i = 0;
5028                 }
5029                 eop = tx_ring->buffer_info[i].next_to_watch;
5030                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5031         }
5032
5033         tx_ring->next_to_clean = i;
5034
5035         if (unlikely(count &&
5036                      netif_carrier_ok(netdev) &&
5037                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5038                 /* Make sure that anybody stopping the queue after this
5039                  * sees the new next_to_clean.
5040                  */
5041                 smp_mb();
5042                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5043                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5044                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5045                         tx_ring->tx_stats.restart_queue++;
5046                 }
5047         }
5048
5049         if (tx_ring->detect_tx_hung) {
5050                 /* Detect a transmit hang in hardware, this serializes the
5051                  * check with the clearing of time_stamp and movement of i */
5052                 tx_ring->detect_tx_hung = false;
5053                 if (tx_ring->buffer_info[i].time_stamp &&
5054                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5055                                (adapter->tx_timeout_factor * HZ)) &&
5056                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5057
5058                         /* detected Tx unit hang */
5059                         dev_err(&tx_ring->pdev->dev,
5060                                 "Detected Tx Unit Hang\n"
5061                                 "  Tx Queue             <%d>\n"
5062                                 "  TDH                  <%x>\n"
5063                                 "  TDT                  <%x>\n"
5064                                 "  next_to_use          <%x>\n"
5065                                 "  next_to_clean        <%x>\n"
5066                                 "buffer_info[next_to_clean]\n"
5067                                 "  time_stamp           <%lx>\n"
5068                                 "  next_to_watch        <%x>\n"
5069                                 "  jiffies              <%lx>\n"
5070                                 "  desc.status          <%x>\n",
5071                                 tx_ring->queue_index,
5072                                 readl(tx_ring->head),
5073                                 readl(tx_ring->tail),
5074                                 tx_ring->next_to_use,
5075                                 tx_ring->next_to_clean,
5076                                 tx_ring->buffer_info[eop].time_stamp,
5077                                 eop,
5078                                 jiffies,
5079                                 eop_desc->wb.status);
5080                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5081                 }
5082         }
5083         tx_ring->total_bytes += total_bytes;
5084         tx_ring->total_packets += total_packets;
5085         tx_ring->tx_stats.bytes += total_bytes;
5086         tx_ring->tx_stats.packets += total_packets;
5087         return (count < tx_ring->count);
5088 }
5089
5090 /**
5091  * igb_receive_skb - helper function to handle rx indications
5092  * @q_vector: structure containing interrupt and ring information
5093  * @skb: packet to send up
5094  * @vlan_tag: vlan tag for packet
5095  **/
5096 static void igb_receive_skb(struct igb_q_vector *q_vector,
5097                             struct sk_buff *skb,
5098                             u16 vlan_tag)
5099 {
5100         struct igb_adapter *adapter = q_vector->adapter;
5101
5102         if (vlan_tag && adapter->vlgrp)
5103                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5104                                  vlan_tag, skb);
5105         else
5106                 napi_gro_receive(&q_vector->napi, skb);
5107 }
5108
5109 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5110                                        u32 status_err, struct sk_buff *skb)
5111 {
5112         skb->ip_summed = CHECKSUM_NONE;
5113
5114         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5115         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5116              (status_err & E1000_RXD_STAT_IXSM))
5117                 return;
5118
5119         /* TCP/UDP checksum error bit is set */
5120         if (status_err &
5121             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5122                 /*
5123                  * work around errata with sctp packets where the TCPE aka
5124                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5125                  * packets, (aka let the stack check the crc32c)
5126                  */
5127                 if ((skb->len == 60) &&
5128                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5129                         ring->rx_stats.csum_err++;
5130
5131                 /* let the stack verify checksum errors */
5132                 return;
5133         }
5134         /* It must be a TCP or UDP packet with a valid checksum */
5135         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5136                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5137
5138         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5139 }
5140
5141 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5142                                    struct sk_buff *skb)
5143 {
5144         struct igb_adapter *adapter = q_vector->adapter;
5145         struct e1000_hw *hw = &adapter->hw;
5146         u64 regval;
5147
5148         /*
5149          * If this bit is set, then the RX registers contain the time stamp. No
5150          * other packet will be time stamped until we read these registers, so
5151          * read the registers to make them available again. Because only one
5152          * packet can be time stamped at a time, we know that the register
5153          * values must belong to this one here and therefore we don't need to
5154          * compare any of the additional attributes stored for it.
5155          *
5156          * If nothing went wrong, then it should have a skb_shared_tx that we
5157          * can turn into a skb_shared_hwtstamps.
5158          */
5159         if (staterr & E1000_RXDADV_STAT_TSIP) {
5160                 u32 *stamp = (u32 *)skb->data;
5161                 regval = le32_to_cpu(*(stamp + 2));
5162                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5163                 skb_pull(skb, IGB_TS_HDR_LEN);
5164         } else {
5165                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5166                         return;
5167
5168                 regval = rd32(E1000_RXSTMPL);
5169                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5170         }
5171
5172         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5173 }
5174 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5175                                union e1000_adv_rx_desc *rx_desc)
5176 {
5177         /* HW will not DMA in data larger than the given buffer, even if it
5178          * parses the (NFS, of course) header to be larger.  In that case, it
5179          * fills the header buffer and spills the rest into the page.
5180          */
5181         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5182                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5183         if (hlen > rx_ring->rx_buffer_len)
5184                 hlen = rx_ring->rx_buffer_len;
5185         return hlen;
5186 }
5187
5188 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5189                                  int *work_done, int budget)
5190 {
5191         struct igb_ring *rx_ring = q_vector->rx_ring;
5192         struct net_device *netdev = rx_ring->netdev;
5193         struct pci_dev *pdev = rx_ring->pdev;
5194         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5195         struct igb_buffer *buffer_info , *next_buffer;
5196         struct sk_buff *skb;
5197         bool cleaned = false;
5198         int cleaned_count = 0;
5199         int current_node = numa_node_id();
5200         unsigned int total_bytes = 0, total_packets = 0;
5201         unsigned int i;
5202         u32 staterr;
5203         u16 length;
5204         u16 vlan_tag;
5205
5206         i = rx_ring->next_to_clean;
5207         buffer_info = &rx_ring->buffer_info[i];
5208         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5209         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5210
5211         while (staterr & E1000_RXD_STAT_DD) {
5212                 if (*work_done >= budget)
5213                         break;
5214                 (*work_done)++;
5215
5216                 skb = buffer_info->skb;
5217                 prefetch(skb->data - NET_IP_ALIGN);
5218                 buffer_info->skb = NULL;
5219
5220                 i++;
5221                 if (i == rx_ring->count)
5222                         i = 0;
5223
5224                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5225                 prefetch(next_rxd);
5226                 next_buffer = &rx_ring->buffer_info[i];
5227
5228                 length = le16_to_cpu(rx_desc->wb.upper.length);
5229                 cleaned = true;
5230                 cleaned_count++;
5231
5232                 if (buffer_info->dma) {
5233                         pci_unmap_single(pdev, buffer_info->dma,
5234                                          rx_ring->rx_buffer_len,
5235                                          PCI_DMA_FROMDEVICE);
5236                         buffer_info->dma = 0;
5237                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5238                                 skb_put(skb, length);
5239                                 goto send_up;
5240                         }
5241                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5242                 }
5243
5244                 if (length) {
5245                         pci_unmap_page(pdev, buffer_info->page_dma,
5246                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5247                         buffer_info->page_dma = 0;
5248
5249                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5250                                                 buffer_info->page,
5251                                                 buffer_info->page_offset,
5252                                                 length);
5253
5254                         if ((page_count(buffer_info->page) != 1) ||
5255                             (page_to_nid(buffer_info->page) != current_node))
5256                                 buffer_info->page = NULL;
5257                         else
5258                                 get_page(buffer_info->page);
5259
5260                         skb->len += length;
5261                         skb->data_len += length;
5262                         skb->truesize += length;
5263                 }
5264
5265                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5266                         buffer_info->skb = next_buffer->skb;
5267                         buffer_info->dma = next_buffer->dma;
5268                         next_buffer->skb = skb;
5269                         next_buffer->dma = 0;
5270                         goto next_desc;
5271                 }
5272 send_up:
5273                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5274                         dev_kfree_skb_irq(skb);
5275                         goto next_desc;
5276                 }
5277
5278                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5279                         igb_rx_hwtstamp(q_vector, staterr, skb);
5280                 total_bytes += skb->len;
5281                 total_packets++;
5282
5283                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5284
5285                 skb->protocol = eth_type_trans(skb, netdev);
5286                 skb_record_rx_queue(skb, rx_ring->queue_index);
5287
5288                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5289                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5290
5291                 igb_receive_skb(q_vector, skb, vlan_tag);
5292
5293 next_desc:
5294                 rx_desc->wb.upper.status_error = 0;
5295
5296                 /* return some buffers to hardware, one at a time is too slow */
5297                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5298                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5299                         cleaned_count = 0;
5300                 }
5301
5302                 /* use prefetched values */
5303                 rx_desc = next_rxd;
5304                 buffer_info = next_buffer;
5305                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5306         }
5307
5308         rx_ring->next_to_clean = i;
5309         cleaned_count = igb_desc_unused(rx_ring);
5310
5311         if (cleaned_count)
5312                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5313
5314         rx_ring->total_packets += total_packets;
5315         rx_ring->total_bytes += total_bytes;
5316         rx_ring->rx_stats.packets += total_packets;
5317         rx_ring->rx_stats.bytes += total_bytes;
5318         return cleaned;
5319 }
5320
5321 /**
5322  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5323  * @adapter: address of board private structure
5324  **/
5325 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5326 {
5327         struct net_device *netdev = rx_ring->netdev;
5328         union e1000_adv_rx_desc *rx_desc;
5329         struct igb_buffer *buffer_info;
5330         struct sk_buff *skb;
5331         unsigned int i;
5332         int bufsz;
5333
5334         i = rx_ring->next_to_use;
5335         buffer_info = &rx_ring->buffer_info[i];
5336
5337         bufsz = rx_ring->rx_buffer_len;
5338
5339         while (cleaned_count--) {
5340                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5341
5342                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5343                         if (!buffer_info->page) {
5344                                 buffer_info->page = netdev_alloc_page(netdev);
5345                                 if (!buffer_info->page) {
5346                                         rx_ring->rx_stats.alloc_failed++;
5347                                         goto no_buffers;
5348                                 }
5349                                 buffer_info->page_offset = 0;
5350                         } else {
5351                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5352                         }
5353                         buffer_info->page_dma =
5354                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5355                                              buffer_info->page_offset,
5356                                              PAGE_SIZE / 2,
5357                                              PCI_DMA_FROMDEVICE);
5358                         if (pci_dma_mapping_error(rx_ring->pdev,
5359                                                   buffer_info->page_dma)) {
5360                                 buffer_info->page_dma = 0;
5361                                 rx_ring->rx_stats.alloc_failed++;
5362                                 goto no_buffers;
5363                         }
5364                 }
5365
5366                 skb = buffer_info->skb;
5367                 if (!skb) {
5368                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5369                         if (!skb) {
5370                                 rx_ring->rx_stats.alloc_failed++;
5371                                 goto no_buffers;
5372                         }
5373
5374                         buffer_info->skb = skb;
5375                 }
5376                 if (!buffer_info->dma) {
5377                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5378                                                           skb->data,
5379                                                           bufsz,
5380                                                           PCI_DMA_FROMDEVICE);
5381                         if (pci_dma_mapping_error(rx_ring->pdev,
5382                                                   buffer_info->dma)) {
5383                                 buffer_info->dma = 0;
5384                                 rx_ring->rx_stats.alloc_failed++;
5385                                 goto no_buffers;
5386                         }
5387                 }
5388                 /* Refresh the desc even if buffer_addrs didn't change because
5389                  * each write-back erases this info. */
5390                 if (bufsz < IGB_RXBUFFER_1024) {
5391                         rx_desc->read.pkt_addr =
5392                              cpu_to_le64(buffer_info->page_dma);
5393                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5394                 } else {
5395                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5396                         rx_desc->read.hdr_addr = 0;
5397                 }
5398
5399                 i++;
5400                 if (i == rx_ring->count)
5401                         i = 0;
5402                 buffer_info = &rx_ring->buffer_info[i];
5403         }
5404
5405 no_buffers:
5406         if (rx_ring->next_to_use != i) {
5407                 rx_ring->next_to_use = i;
5408                 if (i == 0)
5409                         i = (rx_ring->count - 1);
5410                 else
5411                         i--;
5412
5413                 /* Force memory writes to complete before letting h/w
5414                  * know there are new descriptors to fetch.  (Only
5415                  * applicable for weak-ordered memory model archs,
5416                  * such as IA-64). */
5417                 wmb();
5418                 writel(i, rx_ring->tail);
5419         }
5420 }
5421
5422 /**
5423  * igb_mii_ioctl -
5424  * @netdev:
5425  * @ifreq:
5426  * @cmd:
5427  **/
5428 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5429 {
5430         struct igb_adapter *adapter = netdev_priv(netdev);
5431         struct mii_ioctl_data *data = if_mii(ifr);
5432
5433         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5434                 return -EOPNOTSUPP;
5435
5436         switch (cmd) {
5437         case SIOCGMIIPHY:
5438                 data->phy_id = adapter->hw.phy.addr;
5439                 break;
5440         case SIOCGMIIREG:
5441                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5442                                      &data->val_out))
5443                         return -EIO;
5444                 break;
5445         case SIOCSMIIREG:
5446         default:
5447                 return -EOPNOTSUPP;
5448         }
5449         return 0;
5450 }
5451
5452 /**
5453  * igb_hwtstamp_ioctl - control hardware time stamping
5454  * @netdev:
5455  * @ifreq:
5456  * @cmd:
5457  *
5458  * Outgoing time stamping can be enabled and disabled. Play nice and
5459  * disable it when requested, although it shouldn't case any overhead
5460  * when no packet needs it. At most one packet in the queue may be
5461  * marked for time stamping, otherwise it would be impossible to tell
5462  * for sure to which packet the hardware time stamp belongs.
5463  *
5464  * Incoming time stamping has to be configured via the hardware
5465  * filters. Not all combinations are supported, in particular event
5466  * type has to be specified. Matching the kind of event packet is
5467  * not supported, with the exception of "all V2 events regardless of
5468  * level 2 or 4".
5469  *
5470  **/
5471 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5472                               struct ifreq *ifr, int cmd)
5473 {
5474         struct igb_adapter *adapter = netdev_priv(netdev);
5475         struct e1000_hw *hw = &adapter->hw;
5476         struct hwtstamp_config config;
5477         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5478         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5479         u32 tsync_rx_cfg = 0;
5480         bool is_l4 = false;
5481         bool is_l2 = false;
5482         u32 regval;
5483
5484         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5485                 return -EFAULT;
5486
5487         /* reserved for future extensions */
5488         if (config.flags)
5489                 return -EINVAL;
5490
5491         switch (config.tx_type) {
5492         case HWTSTAMP_TX_OFF:
5493                 tsync_tx_ctl = 0;
5494         case HWTSTAMP_TX_ON:
5495                 break;
5496         default:
5497                 return -ERANGE;
5498         }
5499
5500         switch (config.rx_filter) {
5501         case HWTSTAMP_FILTER_NONE:
5502                 tsync_rx_ctl = 0;
5503                 break;
5504         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5505         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5506         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5507         case HWTSTAMP_FILTER_ALL:
5508                 /*
5509                  * register TSYNCRXCFG must be set, therefore it is not
5510                  * possible to time stamp both Sync and Delay_Req messages
5511                  * => fall back to time stamping all packets
5512                  */
5513                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5514                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5515                 break;
5516         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5517                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5518                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5519                 is_l4 = true;
5520                 break;
5521         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5522                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5523                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5524                 is_l4 = true;
5525                 break;
5526         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5527         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5528                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5529                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5530                 is_l2 = true;
5531                 is_l4 = true;
5532                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5533                 break;
5534         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5535         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5536                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5537                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5538                 is_l2 = true;
5539                 is_l4 = true;
5540                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5541                 break;
5542         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5543         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5544         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5545                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5546                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5547                 is_l2 = true;
5548                 break;
5549         default:
5550                 return -ERANGE;
5551         }
5552
5553         if (hw->mac.type == e1000_82575) {
5554                 if (tsync_rx_ctl | tsync_tx_ctl)
5555                         return -EINVAL;
5556                 return 0;
5557         }
5558
5559         /*
5560          * Per-packet timestamping only works if all packets are
5561          * timestamped, so enable timestamping in all packets as
5562          * long as one rx filter was configured.
5563          */
5564         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5565                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5566                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5567         }
5568
5569         /* enable/disable TX */
5570         regval = rd32(E1000_TSYNCTXCTL);
5571         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5572         regval |= tsync_tx_ctl;
5573         wr32(E1000_TSYNCTXCTL, regval);
5574
5575         /* enable/disable RX */
5576         regval = rd32(E1000_TSYNCRXCTL);
5577         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5578         regval |= tsync_rx_ctl;
5579         wr32(E1000_TSYNCRXCTL, regval);
5580
5581         /* define which PTP packets are time stamped */
5582         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5583
5584         /* define ethertype filter for timestamped packets */
5585         if (is_l2)
5586                 wr32(E1000_ETQF(3),
5587                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5588                                  E1000_ETQF_1588 | /* enable timestamping */
5589                                  ETH_P_1588));     /* 1588 eth protocol type */
5590         else
5591                 wr32(E1000_ETQF(3), 0);
5592
5593 #define PTP_PORT 319
5594         /* L4 Queue Filter[3]: filter by destination port and protocol */
5595         if (is_l4) {
5596                 u32 ftqf = (IPPROTO_UDP /* UDP */
5597                         | E1000_FTQF_VF_BP /* VF not compared */
5598                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5599                         | E1000_FTQF_MASK); /* mask all inputs */
5600                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5601
5602                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5603                 wr32(E1000_IMIREXT(3),
5604                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5605                 if (hw->mac.type == e1000_82576) {
5606                         /* enable source port check */
5607                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5608                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5609                 }
5610                 wr32(E1000_FTQF(3), ftqf);
5611         } else {
5612                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5613         }
5614         wrfl();
5615
5616         adapter->hwtstamp_config = config;
5617
5618         /* clear TX/RX time stamp registers, just to be sure */
5619         regval = rd32(E1000_TXSTMPH);
5620         regval = rd32(E1000_RXSTMPH);
5621
5622         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5623                 -EFAULT : 0;
5624 }
5625
5626 /**
5627  * igb_ioctl -
5628  * @netdev:
5629  * @ifreq:
5630  * @cmd:
5631  **/
5632 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5633 {
5634         switch (cmd) {
5635         case SIOCGMIIPHY:
5636         case SIOCGMIIREG:
5637         case SIOCSMIIREG:
5638                 return igb_mii_ioctl(netdev, ifr, cmd);
5639         case SIOCSHWTSTAMP:
5640                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5641         default:
5642                 return -EOPNOTSUPP;
5643         }
5644 }
5645
5646 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5647 {
5648         struct igb_adapter *adapter = hw->back;
5649         u16 cap_offset;
5650
5651         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5652         if (!cap_offset)
5653                 return -E1000_ERR_CONFIG;
5654
5655         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5656
5657         return 0;
5658 }
5659
5660 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5661 {
5662         struct igb_adapter *adapter = hw->back;
5663         u16 cap_offset;
5664
5665         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5666         if (!cap_offset)
5667                 return -E1000_ERR_CONFIG;
5668
5669         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5670
5671         return 0;
5672 }
5673
5674 static void igb_vlan_rx_register(struct net_device *netdev,
5675                                  struct vlan_group *grp)
5676 {
5677         struct igb_adapter *adapter = netdev_priv(netdev);
5678         struct e1000_hw *hw = &adapter->hw;
5679         u32 ctrl, rctl;
5680
5681         igb_irq_disable(adapter);
5682         adapter->vlgrp = grp;
5683
5684         if (grp) {
5685                 /* enable VLAN tag insert/strip */
5686                 ctrl = rd32(E1000_CTRL);
5687                 ctrl |= E1000_CTRL_VME;
5688                 wr32(E1000_CTRL, ctrl);
5689
5690                 /* Disable CFI check */
5691                 rctl = rd32(E1000_RCTL);
5692                 rctl &= ~E1000_RCTL_CFIEN;
5693                 wr32(E1000_RCTL, rctl);
5694         } else {
5695                 /* disable VLAN tag insert/strip */
5696                 ctrl = rd32(E1000_CTRL);
5697                 ctrl &= ~E1000_CTRL_VME;
5698                 wr32(E1000_CTRL, ctrl);
5699         }
5700
5701         igb_rlpml_set(adapter);
5702
5703         if (!test_bit(__IGB_DOWN, &adapter->state))
5704                 igb_irq_enable(adapter);
5705 }
5706
5707 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5708 {
5709         struct igb_adapter *adapter = netdev_priv(netdev);
5710         struct e1000_hw *hw = &adapter->hw;
5711         int pf_id = adapter->vfs_allocated_count;
5712
5713         /* attempt to add filter to vlvf array */
5714         igb_vlvf_set(adapter, vid, true, pf_id);
5715
5716         /* add the filter since PF can receive vlans w/o entry in vlvf */
5717         igb_vfta_set(hw, vid, true);
5718 }
5719
5720 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5721 {
5722         struct igb_adapter *adapter = netdev_priv(netdev);
5723         struct e1000_hw *hw = &adapter->hw;
5724         int pf_id = adapter->vfs_allocated_count;
5725         s32 err;
5726
5727         igb_irq_disable(adapter);
5728         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5729
5730         if (!test_bit(__IGB_DOWN, &adapter->state))
5731                 igb_irq_enable(adapter);
5732
5733         /* remove vlan from VLVF table array */
5734         err = igb_vlvf_set(adapter, vid, false, pf_id);
5735
5736         /* if vid was not present in VLVF just remove it from table */
5737         if (err)
5738                 igb_vfta_set(hw, vid, false);
5739 }
5740
5741 static void igb_restore_vlan(struct igb_adapter *adapter)
5742 {
5743         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5744
5745         if (adapter->vlgrp) {
5746                 u16 vid;
5747                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5748                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5749                                 continue;
5750                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5751                 }
5752         }
5753 }
5754
5755 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5756 {
5757         struct pci_dev *pdev = adapter->pdev;
5758         struct e1000_mac_info *mac = &adapter->hw.mac;
5759
5760         mac->autoneg = 0;
5761
5762         switch (spddplx) {
5763         case SPEED_10 + DUPLEX_HALF:
5764                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5765                 break;
5766         case SPEED_10 + DUPLEX_FULL:
5767                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5768                 break;
5769         case SPEED_100 + DUPLEX_HALF:
5770                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5771                 break;
5772         case SPEED_100 + DUPLEX_FULL:
5773                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5774                 break;
5775         case SPEED_1000 + DUPLEX_FULL:
5776                 mac->autoneg = 1;
5777                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5778                 break;
5779         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5780         default:
5781                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5782                 return -EINVAL;
5783         }
5784         return 0;
5785 }
5786
5787 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5788 {
5789         struct net_device *netdev = pci_get_drvdata(pdev);
5790         struct igb_adapter *adapter = netdev_priv(netdev);
5791         struct e1000_hw *hw = &adapter->hw;
5792         u32 ctrl, rctl, status;
5793         u32 wufc = adapter->wol;
5794 #ifdef CONFIG_PM
5795         int retval = 0;
5796 #endif
5797
5798         netif_device_detach(netdev);
5799
5800         if (netif_running(netdev))
5801                 igb_close(netdev);
5802
5803         igb_clear_interrupt_scheme(adapter);
5804
5805 #ifdef CONFIG_PM
5806         retval = pci_save_state(pdev);
5807         if (retval)
5808                 return retval;
5809 #endif
5810
5811         status = rd32(E1000_STATUS);
5812         if (status & E1000_STATUS_LU)
5813                 wufc &= ~E1000_WUFC_LNKC;
5814
5815         if (wufc) {
5816                 igb_setup_rctl(adapter);
5817                 igb_set_rx_mode(netdev);
5818
5819                 /* turn on all-multi mode if wake on multicast is enabled */
5820                 if (wufc & E1000_WUFC_MC) {
5821                         rctl = rd32(E1000_RCTL);
5822                         rctl |= E1000_RCTL_MPE;
5823                         wr32(E1000_RCTL, rctl);
5824                 }
5825
5826                 ctrl = rd32(E1000_CTRL);
5827                 /* advertise wake from D3Cold */
5828                 #define E1000_CTRL_ADVD3WUC 0x00100000
5829                 /* phy power management enable */
5830                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5831                 ctrl |= E1000_CTRL_ADVD3WUC;
5832                 wr32(E1000_CTRL, ctrl);
5833
5834                 /* Allow time for pending master requests to run */
5835                 igb_disable_pcie_master(hw);
5836
5837                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5838                 wr32(E1000_WUFC, wufc);
5839         } else {
5840                 wr32(E1000_WUC, 0);
5841                 wr32(E1000_WUFC, 0);
5842         }
5843
5844         *enable_wake = wufc || adapter->en_mng_pt;
5845         if (!*enable_wake)
5846                 igb_power_down_link(adapter);
5847         else
5848                 igb_power_up_link(adapter);
5849
5850         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5851          * would have already happened in close and is redundant. */
5852         igb_release_hw_control(adapter);
5853
5854         pci_disable_device(pdev);
5855
5856         return 0;
5857 }
5858
5859 #ifdef CONFIG_PM
5860 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5861 {
5862         int retval;
5863         bool wake;
5864
5865         retval = __igb_shutdown(pdev, &wake);
5866         if (retval)
5867                 return retval;
5868
5869         if (wake) {
5870                 pci_prepare_to_sleep(pdev);
5871         } else {
5872                 pci_wake_from_d3(pdev, false);
5873                 pci_set_power_state(pdev, PCI_D3hot);
5874         }
5875
5876         return 0;
5877 }
5878
5879 static int igb_resume(struct pci_dev *pdev)
5880 {
5881         struct net_device *netdev = pci_get_drvdata(pdev);
5882         struct igb_adapter *adapter = netdev_priv(netdev);
5883         struct e1000_hw *hw = &adapter->hw;
5884         u32 err;
5885
5886         pci_set_power_state(pdev, PCI_D0);
5887         pci_restore_state(pdev);
5888         pci_save_state(pdev);
5889
5890         err = pci_enable_device_mem(pdev);
5891         if (err) {
5892                 dev_err(&pdev->dev,
5893                         "igb: Cannot enable PCI device from suspend\n");
5894                 return err;
5895         }
5896         pci_set_master(pdev);
5897
5898         pci_enable_wake(pdev, PCI_D3hot, 0);
5899         pci_enable_wake(pdev, PCI_D3cold, 0);
5900
5901         if (igb_init_interrupt_scheme(adapter)) {
5902                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5903                 return -ENOMEM;
5904         }
5905
5906         igb_reset(adapter);
5907
5908         /* let the f/w know that the h/w is now under the control of the
5909          * driver. */
5910         igb_get_hw_control(adapter);
5911
5912         wr32(E1000_WUS, ~0);
5913
5914         if (netif_running(netdev)) {
5915                 err = igb_open(netdev);
5916                 if (err)
5917                         return err;
5918         }
5919
5920         netif_device_attach(netdev);
5921
5922         return 0;
5923 }
5924 #endif
5925
5926 static void igb_shutdown(struct pci_dev *pdev)
5927 {
5928         bool wake;
5929
5930         __igb_shutdown(pdev, &wake);
5931
5932         if (system_state == SYSTEM_POWER_OFF) {
5933                 pci_wake_from_d3(pdev, wake);
5934                 pci_set_power_state(pdev, PCI_D3hot);
5935         }
5936 }
5937
5938 #ifdef CONFIG_NET_POLL_CONTROLLER
5939 /*
5940  * Polling 'interrupt' - used by things like netconsole to send skbs
5941  * without having to re-enable interrupts. It's not called while
5942  * the interrupt routine is executing.
5943  */
5944 static void igb_netpoll(struct net_device *netdev)
5945 {
5946         struct igb_adapter *adapter = netdev_priv(netdev);
5947         struct e1000_hw *hw = &adapter->hw;
5948         int i;
5949
5950         if (!adapter->msix_entries) {
5951                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5952                 igb_irq_disable(adapter);
5953                 napi_schedule(&q_vector->napi);
5954                 return;
5955         }
5956
5957         for (i = 0; i < adapter->num_q_vectors; i++) {
5958                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5959                 wr32(E1000_EIMC, q_vector->eims_value);
5960                 napi_schedule(&q_vector->napi);
5961         }
5962 }
5963 #endif /* CONFIG_NET_POLL_CONTROLLER */
5964
5965 /**
5966  * igb_io_error_detected - called when PCI error is detected
5967  * @pdev: Pointer to PCI device
5968  * @state: The current pci connection state
5969  *
5970  * This function is called after a PCI bus error affecting
5971  * this device has been detected.
5972  */
5973 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5974                                               pci_channel_state_t state)
5975 {
5976         struct net_device *netdev = pci_get_drvdata(pdev);
5977         struct igb_adapter *adapter = netdev_priv(netdev);
5978
5979         netif_device_detach(netdev);
5980
5981         if (state == pci_channel_io_perm_failure)
5982                 return PCI_ERS_RESULT_DISCONNECT;
5983
5984         if (netif_running(netdev))
5985                 igb_down(adapter);
5986         pci_disable_device(pdev);
5987
5988         /* Request a slot slot reset. */
5989         return PCI_ERS_RESULT_NEED_RESET;
5990 }
5991
5992 /**
5993  * igb_io_slot_reset - called after the pci bus has been reset.
5994  * @pdev: Pointer to PCI device
5995  *
5996  * Restart the card from scratch, as if from a cold-boot. Implementation
5997  * resembles the first-half of the igb_resume routine.
5998  */
5999 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6000 {
6001         struct net_device *netdev = pci_get_drvdata(pdev);
6002         struct igb_adapter *adapter = netdev_priv(netdev);
6003         struct e1000_hw *hw = &adapter->hw;
6004         pci_ers_result_t result;
6005         int err;
6006
6007         if (pci_enable_device_mem(pdev)) {
6008                 dev_err(&pdev->dev,
6009                         "Cannot re-enable PCI device after reset.\n");
6010                 result = PCI_ERS_RESULT_DISCONNECT;
6011         } else {
6012                 pci_set_master(pdev);
6013                 pci_restore_state(pdev);
6014                 pci_save_state(pdev);
6015
6016                 pci_enable_wake(pdev, PCI_D3hot, 0);
6017                 pci_enable_wake(pdev, PCI_D3cold, 0);
6018
6019                 igb_reset(adapter);
6020                 wr32(E1000_WUS, ~0);
6021                 result = PCI_ERS_RESULT_RECOVERED;
6022         }
6023
6024         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6025         if (err) {
6026                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6027                         "failed 0x%0x\n", err);
6028                 /* non-fatal, continue */
6029         }
6030
6031         return result;
6032 }
6033
6034 /**
6035  * igb_io_resume - called when traffic can start flowing again.
6036  * @pdev: Pointer to PCI device
6037  *
6038  * This callback is called when the error recovery driver tells us that
6039  * its OK to resume normal operation. Implementation resembles the
6040  * second-half of the igb_resume routine.
6041  */
6042 static void igb_io_resume(struct pci_dev *pdev)
6043 {
6044         struct net_device *netdev = pci_get_drvdata(pdev);
6045         struct igb_adapter *adapter = netdev_priv(netdev);
6046
6047         if (netif_running(netdev)) {
6048                 if (igb_up(adapter)) {
6049                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6050                         return;
6051                 }
6052         }
6053
6054         netif_device_attach(netdev);
6055
6056         /* let the f/w know that the h/w is now under the control of the
6057          * driver. */
6058         igb_get_hw_control(adapter);
6059 }
6060
6061 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6062                              u8 qsel)
6063 {
6064         u32 rar_low, rar_high;
6065         struct e1000_hw *hw = &adapter->hw;
6066
6067         /* HW expects these in little endian so we reverse the byte order
6068          * from network order (big endian) to little endian
6069          */
6070         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6071                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6072         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6073
6074         /* Indicate to hardware the Address is Valid. */
6075         rar_high |= E1000_RAH_AV;
6076
6077         if (hw->mac.type == e1000_82575)
6078                 rar_high |= E1000_RAH_POOL_1 * qsel;
6079         else
6080                 rar_high |= E1000_RAH_POOL_1 << qsel;
6081
6082         wr32(E1000_RAL(index), rar_low);
6083         wrfl();
6084         wr32(E1000_RAH(index), rar_high);
6085         wrfl();
6086 }
6087
6088 static int igb_set_vf_mac(struct igb_adapter *adapter,
6089                           int vf, unsigned char *mac_addr)
6090 {
6091         struct e1000_hw *hw = &adapter->hw;
6092         /* VF MAC addresses start at end of receive addresses and moves
6093          * torwards the first, as a result a collision should not be possible */
6094         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6095
6096         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6097
6098         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6099
6100         return 0;
6101 }
6102
6103 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6104 {
6105         struct igb_adapter *adapter = netdev_priv(netdev);
6106         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6107                 return -EINVAL;
6108         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6109         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6110         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6111                                       " change effective.");
6112         if (test_bit(__IGB_DOWN, &adapter->state)) {
6113                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6114                          " but the PF device is not up.\n");
6115                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6116                          " attempting to use the VF device.\n");
6117         }
6118         return igb_set_vf_mac(adapter, vf, mac);
6119 }
6120
6121 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6122 {
6123         return -EOPNOTSUPP;
6124 }
6125
6126 static int igb_ndo_get_vf_config(struct net_device *netdev,
6127                                  int vf, struct ifla_vf_info *ivi)
6128 {
6129         struct igb_adapter *adapter = netdev_priv(netdev);
6130         if (vf >= adapter->vfs_allocated_count)
6131                 return -EINVAL;
6132         ivi->vf = vf;
6133         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6134         ivi->tx_rate = 0;
6135         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6136         ivi->qos = adapter->vf_data[vf].pf_qos;
6137         return 0;
6138 }
6139
6140 static void igb_vmm_control(struct igb_adapter *adapter)
6141 {
6142         struct e1000_hw *hw = &adapter->hw;
6143         u32 reg;
6144
6145         switch (hw->mac.type) {
6146         case e1000_82575:
6147         default:
6148                 /* replication is not supported for 82575 */
6149                 return;
6150         case e1000_82576:
6151                 /* notify HW that the MAC is adding vlan tags */
6152                 reg = rd32(E1000_DTXCTL);
6153                 reg |= E1000_DTXCTL_VLAN_ADDED;
6154                 wr32(E1000_DTXCTL, reg);
6155         case e1000_82580:
6156                 /* enable replication vlan tag stripping */
6157                 reg = rd32(E1000_RPLOLR);
6158                 reg |= E1000_RPLOLR_STRVLAN;
6159                 wr32(E1000_RPLOLR, reg);
6160         case e1000_i350:
6161                 /* none of the above registers are supported by i350 */
6162                 break;
6163         }
6164
6165         if (adapter->vfs_allocated_count) {
6166                 igb_vmdq_set_loopback_pf(hw, true);
6167                 igb_vmdq_set_replication_pf(hw, true);
6168         } else {
6169                 igb_vmdq_set_loopback_pf(hw, false);
6170                 igb_vmdq_set_replication_pf(hw, false);
6171         }
6172 }
6173
6174 /* igb_main.c */