1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
48 #include <linux/dca.h>
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
84 /* required last entry */
88 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
90 void igb_reset(struct igb_adapter *);
91 static int igb_setup_all_tx_resources(struct igb_adapter *);
92 static int igb_setup_all_rx_resources(struct igb_adapter *);
93 static void igb_free_all_tx_resources(struct igb_adapter *);
94 static void igb_free_all_rx_resources(struct igb_adapter *);
95 static void igb_setup_mrqc(struct igb_adapter *);
96 void igb_update_stats(struct igb_adapter *);
97 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
98 static void __devexit igb_remove(struct pci_dev *pdev);
99 static int igb_sw_init(struct igb_adapter *);
100 static int igb_open(struct net_device *);
101 static int igb_close(struct net_device *);
102 static void igb_configure_tx(struct igb_adapter *);
103 static void igb_configure_rx(struct igb_adapter *);
104 static void igb_clean_all_tx_rings(struct igb_adapter *);
105 static void igb_clean_all_rx_rings(struct igb_adapter *);
106 static void igb_clean_tx_ring(struct igb_ring *);
107 static void igb_clean_rx_ring(struct igb_ring *);
108 static void igb_set_rx_mode(struct net_device *);
109 static void igb_update_phy_info(unsigned long);
110 static void igb_watchdog(unsigned long);
111 static void igb_watchdog_task(struct work_struct *);
112 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
113 static struct net_device_stats *igb_get_stats(struct net_device *);
114 static int igb_change_mtu(struct net_device *, int);
115 static int igb_set_mac(struct net_device *, void *);
116 static void igb_set_uta(struct igb_adapter *adapter);
117 static irqreturn_t igb_intr(int irq, void *);
118 static irqreturn_t igb_intr_msi(int irq, void *);
119 static irqreturn_t igb_msix_other(int irq, void *);
120 static irqreturn_t igb_msix_ring(int irq, void *);
121 #ifdef CONFIG_IGB_DCA
122 static void igb_update_dca(struct igb_q_vector *);
123 static void igb_setup_dca(struct igb_adapter *);
124 #endif /* CONFIG_IGB_DCA */
125 static bool igb_clean_tx_irq(struct igb_q_vector *);
126 static int igb_poll(struct napi_struct *, int);
127 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
128 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
129 static void igb_tx_timeout(struct net_device *);
130 static void igb_reset_task(struct work_struct *);
131 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
132 static void igb_vlan_rx_add_vid(struct net_device *, u16);
133 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
134 static void igb_restore_vlan(struct igb_adapter *);
135 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
136 static void igb_ping_all_vfs(struct igb_adapter *);
137 static void igb_msg_task(struct igb_adapter *);
138 static void igb_vmm_control(struct igb_adapter *);
139 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
140 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
141 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
142 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
143 int vf, u16 vlan, u8 qos);
144 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
145 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
146 struct ifla_vf_info *ivi);
149 static int igb_suspend(struct pci_dev *, pm_message_t);
150 static int igb_resume(struct pci_dev *);
152 static void igb_shutdown(struct pci_dev *);
153 #ifdef CONFIG_IGB_DCA
154 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
155 static struct notifier_block dca_notifier = {
156 .notifier_call = igb_notify_dca,
161 #ifdef CONFIG_NET_POLL_CONTROLLER
162 /* for netdump / net console */
163 static void igb_netpoll(struct net_device *);
165 #ifdef CONFIG_PCI_IOV
166 static unsigned int max_vfs = 0;
167 module_param(max_vfs, uint, 0);
168 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
169 "per physical function");
170 #endif /* CONFIG_PCI_IOV */
172 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
173 pci_channel_state_t);
174 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
175 static void igb_io_resume(struct pci_dev *);
177 static struct pci_error_handlers igb_err_handler = {
178 .error_detected = igb_io_error_detected,
179 .slot_reset = igb_io_slot_reset,
180 .resume = igb_io_resume,
184 static struct pci_driver igb_driver = {
185 .name = igb_driver_name,
186 .id_table = igb_pci_tbl,
188 .remove = __devexit_p(igb_remove),
190 /* Power Managment Hooks */
191 .suspend = igb_suspend,
192 .resume = igb_resume,
194 .shutdown = igb_shutdown,
195 .err_handler = &igb_err_handler
198 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
199 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
200 MODULE_LICENSE("GPL");
201 MODULE_VERSION(DRV_VERSION);
204 * igb_read_clock - read raw cycle counter (to be used by time counter)
206 static cycle_t igb_read_clock(const struct cyclecounter *tc)
208 struct igb_adapter *adapter =
209 container_of(tc, struct igb_adapter, cycles);
210 struct e1000_hw *hw = &adapter->hw;
215 * The timestamp latches on lowest register read. For the 82580
216 * the lowest register is SYSTIMR instead of SYSTIML. However we never
217 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
219 if (hw->mac.type == e1000_82580) {
220 stamp = rd32(E1000_SYSTIMR) >> 8;
221 shift = IGB_82580_TSYNC_SHIFT;
224 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
225 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
230 * igb_get_hw_dev - return device
231 * used by hardware layer to print debugging information
233 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
235 struct igb_adapter *adapter = hw->back;
236 return adapter->netdev;
240 * igb_init_module - Driver Registration Routine
242 * igb_init_module is the first routine called when the driver is
243 * loaded. All it does is register with the PCI subsystem.
245 static int __init igb_init_module(void)
248 printk(KERN_INFO "%s - version %s\n",
249 igb_driver_string, igb_driver_version);
251 printk(KERN_INFO "%s\n", igb_copyright);
253 #ifdef CONFIG_IGB_DCA
254 dca_register_notify(&dca_notifier);
256 ret = pci_register_driver(&igb_driver);
260 module_init(igb_init_module);
263 * igb_exit_module - Driver Exit Cleanup Routine
265 * igb_exit_module is called just before the driver is removed
268 static void __exit igb_exit_module(void)
270 #ifdef CONFIG_IGB_DCA
271 dca_unregister_notify(&dca_notifier);
273 pci_unregister_driver(&igb_driver);
276 module_exit(igb_exit_module);
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
280 * igb_cache_ring_register - Descriptor ring to register mapping
281 * @adapter: board private structure to initialize
283 * Once we know the feature-set enabled for the device, we'll cache
284 * the register offset the descriptor ring is assigned to.
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
289 u32 rbase_offset = adapter->vfs_allocated_count;
291 switch (adapter->hw.mac.type) {
293 /* The queues are allocated for virtualization such that VF 0
294 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295 * In order to avoid collision we start at the first free queue
296 * and continue consuming queues in the same sequence
298 if (adapter->vfs_allocated_count) {
299 for (; i < adapter->rss_queues; i++)
300 adapter->rx_ring[i]->reg_idx = rbase_offset +
302 for (; j < adapter->rss_queues; j++)
303 adapter->tx_ring[j]->reg_idx = rbase_offset +
310 for (; i < adapter->num_rx_queues; i++)
311 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
312 for (; j < adapter->num_tx_queues; j++)
313 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
318 static void igb_free_queues(struct igb_adapter *adapter)
322 for (i = 0; i < adapter->num_tx_queues; i++) {
323 kfree(adapter->tx_ring[i]);
324 adapter->tx_ring[i] = NULL;
326 for (i = 0; i < adapter->num_rx_queues; i++) {
327 kfree(adapter->rx_ring[i]);
328 adapter->rx_ring[i] = NULL;
330 adapter->num_rx_queues = 0;
331 adapter->num_tx_queues = 0;
335 * igb_alloc_queues - Allocate memory for all rings
336 * @adapter: board private structure to initialize
338 * We allocate one ring per queue at run-time since we don't know the
339 * number of queues at compile-time.
341 static int igb_alloc_queues(struct igb_adapter *adapter)
343 struct igb_ring *ring;
346 for (i = 0; i < adapter->num_tx_queues; i++) {
347 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
350 ring->count = adapter->tx_ring_count;
351 ring->queue_index = i;
352 ring->pdev = adapter->pdev;
353 ring->netdev = adapter->netdev;
354 /* For 82575, context index must be unique per ring. */
355 if (adapter->hw.mac.type == e1000_82575)
356 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
357 adapter->tx_ring[i] = ring;
360 for (i = 0; i < adapter->num_rx_queues; i++) {
361 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
364 ring->count = adapter->rx_ring_count;
365 ring->queue_index = i;
366 ring->pdev = adapter->pdev;
367 ring->netdev = adapter->netdev;
368 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
369 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
370 /* set flag indicating ring supports SCTP checksum offload */
371 if (adapter->hw.mac.type >= e1000_82576)
372 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
373 adapter->rx_ring[i] = ring;
376 igb_cache_ring_register(adapter);
381 igb_free_queues(adapter);
386 #define IGB_N0_QUEUE -1
387 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
390 struct igb_adapter *adapter = q_vector->adapter;
391 struct e1000_hw *hw = &adapter->hw;
393 int rx_queue = IGB_N0_QUEUE;
394 int tx_queue = IGB_N0_QUEUE;
396 if (q_vector->rx_ring)
397 rx_queue = q_vector->rx_ring->reg_idx;
398 if (q_vector->tx_ring)
399 tx_queue = q_vector->tx_ring->reg_idx;
401 switch (hw->mac.type) {
403 /* The 82575 assigns vectors using a bitmask, which matches the
404 bitmask for the EICR/EIMS/EIMC registers. To assign one
405 or more queues to a vector, we write the appropriate bits
406 into the MSIXBM register for that vector. */
407 if (rx_queue > IGB_N0_QUEUE)
408 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
409 if (tx_queue > IGB_N0_QUEUE)
410 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
411 if (!adapter->msix_entries && msix_vector == 0)
412 msixbm |= E1000_EIMS_OTHER;
413 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
414 q_vector->eims_value = msixbm;
417 /* 82576 uses a table-based method for assigning vectors.
418 Each queue has a single entry in the table to which we write
419 a vector number along with a "valid" bit. Sadly, the layout
420 of the table is somewhat counterintuitive. */
421 if (rx_queue > IGB_N0_QUEUE) {
422 index = (rx_queue & 0x7);
423 ivar = array_rd32(E1000_IVAR0, index);
425 /* vector goes into low byte of register */
426 ivar = ivar & 0xFFFFFF00;
427 ivar |= msix_vector | E1000_IVAR_VALID;
429 /* vector goes into third byte of register */
430 ivar = ivar & 0xFF00FFFF;
431 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
433 array_wr32(E1000_IVAR0, index, ivar);
435 if (tx_queue > IGB_N0_QUEUE) {
436 index = (tx_queue & 0x7);
437 ivar = array_rd32(E1000_IVAR0, index);
439 /* vector goes into second byte of register */
440 ivar = ivar & 0xFFFF00FF;
441 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
443 /* vector goes into high byte of register */
444 ivar = ivar & 0x00FFFFFF;
445 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
447 array_wr32(E1000_IVAR0, index, ivar);
449 q_vector->eims_value = 1 << msix_vector;
453 /* 82580 uses the same table-based approach as 82576 but has fewer
454 entries as a result we carry over for queues greater than 4. */
455 if (rx_queue > IGB_N0_QUEUE) {
456 index = (rx_queue >> 1);
457 ivar = array_rd32(E1000_IVAR0, index);
458 if (rx_queue & 0x1) {
459 /* vector goes into third byte of register */
460 ivar = ivar & 0xFF00FFFF;
461 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
463 /* vector goes into low byte of register */
464 ivar = ivar & 0xFFFFFF00;
465 ivar |= msix_vector | E1000_IVAR_VALID;
467 array_wr32(E1000_IVAR0, index, ivar);
469 if (tx_queue > IGB_N0_QUEUE) {
470 index = (tx_queue >> 1);
471 ivar = array_rd32(E1000_IVAR0, index);
472 if (tx_queue & 0x1) {
473 /* vector goes into high byte of register */
474 ivar = ivar & 0x00FFFFFF;
475 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
477 /* vector goes into second byte of register */
478 ivar = ivar & 0xFFFF00FF;
479 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
481 array_wr32(E1000_IVAR0, index, ivar);
483 q_vector->eims_value = 1 << msix_vector;
490 /* add q_vector eims value to global eims_enable_mask */
491 adapter->eims_enable_mask |= q_vector->eims_value;
493 /* configure q_vector to set itr on first interrupt */
494 q_vector->set_itr = 1;
498 * igb_configure_msix - Configure MSI-X hardware
500 * igb_configure_msix sets up the hardware to properly
501 * generate MSI-X interrupts.
503 static void igb_configure_msix(struct igb_adapter *adapter)
507 struct e1000_hw *hw = &adapter->hw;
509 adapter->eims_enable_mask = 0;
511 /* set vector for other causes, i.e. link changes */
512 switch (hw->mac.type) {
514 tmp = rd32(E1000_CTRL_EXT);
515 /* enable MSI-X PBA support*/
516 tmp |= E1000_CTRL_EXT_PBA_CLR;
518 /* Auto-Mask interrupts upon ICR read. */
519 tmp |= E1000_CTRL_EXT_EIAME;
520 tmp |= E1000_CTRL_EXT_IRCA;
522 wr32(E1000_CTRL_EXT, tmp);
524 /* enable msix_other interrupt */
525 array_wr32(E1000_MSIXBM(0), vector++,
527 adapter->eims_other = E1000_EIMS_OTHER;
534 /* Turn on MSI-X capability first, or our settings
535 * won't stick. And it will take days to debug. */
536 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
537 E1000_GPIE_PBA | E1000_GPIE_EIAME |
540 /* enable msix_other interrupt */
541 adapter->eims_other = 1 << vector;
542 tmp = (vector++ | E1000_IVAR_VALID) << 8;
544 wr32(E1000_IVAR_MISC, tmp);
547 /* do nothing, since nothing else supports MSI-X */
549 } /* switch (hw->mac.type) */
551 adapter->eims_enable_mask |= adapter->eims_other;
553 for (i = 0; i < adapter->num_q_vectors; i++)
554 igb_assign_vector(adapter->q_vector[i], vector++);
560 * igb_request_msix - Initialize MSI-X interrupts
562 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
565 static int igb_request_msix(struct igb_adapter *adapter)
567 struct net_device *netdev = adapter->netdev;
568 struct e1000_hw *hw = &adapter->hw;
569 int i, err = 0, vector = 0;
571 err = request_irq(adapter->msix_entries[vector].vector,
572 igb_msix_other, 0, netdev->name, adapter);
577 for (i = 0; i < adapter->num_q_vectors; i++) {
578 struct igb_q_vector *q_vector = adapter->q_vector[i];
580 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
582 if (q_vector->rx_ring && q_vector->tx_ring)
583 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
584 q_vector->rx_ring->queue_index);
585 else if (q_vector->tx_ring)
586 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
587 q_vector->tx_ring->queue_index);
588 else if (q_vector->rx_ring)
589 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
590 q_vector->rx_ring->queue_index);
592 sprintf(q_vector->name, "%s-unused", netdev->name);
594 err = request_irq(adapter->msix_entries[vector].vector,
595 igb_msix_ring, 0, q_vector->name,
602 igb_configure_msix(adapter);
608 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
610 if (adapter->msix_entries) {
611 pci_disable_msix(adapter->pdev);
612 kfree(adapter->msix_entries);
613 adapter->msix_entries = NULL;
614 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
615 pci_disable_msi(adapter->pdev);
620 * igb_free_q_vectors - Free memory allocated for interrupt vectors
621 * @adapter: board private structure to initialize
623 * This function frees the memory allocated to the q_vectors. In addition if
624 * NAPI is enabled it will delete any references to the NAPI struct prior
625 * to freeing the q_vector.
627 static void igb_free_q_vectors(struct igb_adapter *adapter)
631 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
632 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
633 adapter->q_vector[v_idx] = NULL;
636 netif_napi_del(&q_vector->napi);
639 adapter->num_q_vectors = 0;
643 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
645 * This function resets the device so that it has 0 rx queues, tx queues, and
646 * MSI-X interrupts allocated.
648 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
650 igb_free_queues(adapter);
651 igb_free_q_vectors(adapter);
652 igb_reset_interrupt_capability(adapter);
656 * igb_set_interrupt_capability - set MSI or MSI-X if supported
658 * Attempt to configure interrupts using the best available
659 * capabilities of the hardware and kernel.
661 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
666 /* Number of supported queues. */
667 adapter->num_rx_queues = adapter->rss_queues;
668 adapter->num_tx_queues = adapter->rss_queues;
670 /* start with one vector for every rx queue */
671 numvecs = adapter->num_rx_queues;
673 /* if tx handler is separate add 1 for every tx queue */
674 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
675 numvecs += adapter->num_tx_queues;
677 /* store the number of vectors reserved for queues */
678 adapter->num_q_vectors = numvecs;
680 /* add 1 vector for link status interrupts */
682 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
684 if (!adapter->msix_entries)
687 for (i = 0; i < numvecs; i++)
688 adapter->msix_entries[i].entry = i;
690 err = pci_enable_msix(adapter->pdev,
691 adapter->msix_entries,
696 igb_reset_interrupt_capability(adapter);
698 /* If we can't do MSI-X, try MSI */
700 #ifdef CONFIG_PCI_IOV
701 /* disable SR-IOV for non MSI-X configurations */
702 if (adapter->vf_data) {
703 struct e1000_hw *hw = &adapter->hw;
704 /* disable iov and allow time for transactions to clear */
705 pci_disable_sriov(adapter->pdev);
708 kfree(adapter->vf_data);
709 adapter->vf_data = NULL;
710 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
712 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
715 adapter->vfs_allocated_count = 0;
716 adapter->rss_queues = 1;
717 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
718 adapter->num_rx_queues = 1;
719 adapter->num_tx_queues = 1;
720 adapter->num_q_vectors = 1;
721 if (!pci_enable_msi(adapter->pdev))
722 adapter->flags |= IGB_FLAG_HAS_MSI;
724 /* Notify the stack of the (possibly) reduced Tx Queue count. */
725 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
730 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
731 * @adapter: board private structure to initialize
733 * We allocate one q_vector per queue interrupt. If allocation fails we
736 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
738 struct igb_q_vector *q_vector;
739 struct e1000_hw *hw = &adapter->hw;
742 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
743 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
746 q_vector->adapter = adapter;
747 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
748 q_vector->itr_val = IGB_START_ITR;
749 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
750 adapter->q_vector[v_idx] = q_vector;
755 igb_free_q_vectors(adapter);
759 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
760 int ring_idx, int v_idx)
762 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
764 q_vector->rx_ring = adapter->rx_ring[ring_idx];
765 q_vector->rx_ring->q_vector = q_vector;
766 q_vector->itr_val = adapter->rx_itr_setting;
767 if (q_vector->itr_val && q_vector->itr_val <= 3)
768 q_vector->itr_val = IGB_START_ITR;
771 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
772 int ring_idx, int v_idx)
774 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
776 q_vector->tx_ring = adapter->tx_ring[ring_idx];
777 q_vector->tx_ring->q_vector = q_vector;
778 q_vector->itr_val = adapter->tx_itr_setting;
779 if (q_vector->itr_val && q_vector->itr_val <= 3)
780 q_vector->itr_val = IGB_START_ITR;
784 * igb_map_ring_to_vector - maps allocated queues to vectors
786 * This function maps the recently allocated queues to vectors.
788 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
793 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
794 (adapter->num_q_vectors < adapter->num_tx_queues))
797 if (adapter->num_q_vectors >=
798 (adapter->num_rx_queues + adapter->num_tx_queues)) {
799 for (i = 0; i < adapter->num_rx_queues; i++)
800 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
801 for (i = 0; i < adapter->num_tx_queues; i++)
802 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
804 for (i = 0; i < adapter->num_rx_queues; i++) {
805 if (i < adapter->num_tx_queues)
806 igb_map_tx_ring_to_vector(adapter, i, v_idx);
807 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
809 for (; i < adapter->num_tx_queues; i++)
810 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
816 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
818 * This function initializes the interrupts and allocates all of the queues.
820 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
822 struct pci_dev *pdev = adapter->pdev;
825 igb_set_interrupt_capability(adapter);
827 err = igb_alloc_q_vectors(adapter);
829 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
830 goto err_alloc_q_vectors;
833 err = igb_alloc_queues(adapter);
835 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
836 goto err_alloc_queues;
839 err = igb_map_ring_to_vector(adapter);
841 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
848 igb_free_queues(adapter);
850 igb_free_q_vectors(adapter);
852 igb_reset_interrupt_capability(adapter);
857 * igb_request_irq - initialize interrupts
859 * Attempts to configure interrupts using the best available
860 * capabilities of the hardware and kernel.
862 static int igb_request_irq(struct igb_adapter *adapter)
864 struct net_device *netdev = adapter->netdev;
865 struct pci_dev *pdev = adapter->pdev;
868 if (adapter->msix_entries) {
869 err = igb_request_msix(adapter);
872 /* fall back to MSI */
873 igb_clear_interrupt_scheme(adapter);
874 if (!pci_enable_msi(adapter->pdev))
875 adapter->flags |= IGB_FLAG_HAS_MSI;
876 igb_free_all_tx_resources(adapter);
877 igb_free_all_rx_resources(adapter);
878 adapter->num_tx_queues = 1;
879 adapter->num_rx_queues = 1;
880 adapter->num_q_vectors = 1;
881 err = igb_alloc_q_vectors(adapter);
884 "Unable to allocate memory for vectors\n");
887 err = igb_alloc_queues(adapter);
890 "Unable to allocate memory for queues\n");
891 igb_free_q_vectors(adapter);
894 igb_setup_all_tx_resources(adapter);
895 igb_setup_all_rx_resources(adapter);
897 igb_assign_vector(adapter->q_vector[0], 0);
900 if (adapter->flags & IGB_FLAG_HAS_MSI) {
901 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
902 netdev->name, adapter);
906 /* fall back to legacy interrupts */
907 igb_reset_interrupt_capability(adapter);
908 adapter->flags &= ~IGB_FLAG_HAS_MSI;
911 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
912 netdev->name, adapter);
915 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
922 static void igb_free_irq(struct igb_adapter *adapter)
924 if (adapter->msix_entries) {
927 free_irq(adapter->msix_entries[vector++].vector, adapter);
929 for (i = 0; i < adapter->num_q_vectors; i++) {
930 struct igb_q_vector *q_vector = adapter->q_vector[i];
931 free_irq(adapter->msix_entries[vector++].vector,
935 free_irq(adapter->pdev->irq, adapter);
940 * igb_irq_disable - Mask off interrupt generation on the NIC
941 * @adapter: board private structure
943 static void igb_irq_disable(struct igb_adapter *adapter)
945 struct e1000_hw *hw = &adapter->hw;
948 * we need to be careful when disabling interrupts. The VFs are also
949 * mapped into these registers and so clearing the bits can cause
950 * issues on the VF drivers so we only need to clear what we set
952 if (adapter->msix_entries) {
953 u32 regval = rd32(E1000_EIAM);
954 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
955 wr32(E1000_EIMC, adapter->eims_enable_mask);
956 regval = rd32(E1000_EIAC);
957 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
963 synchronize_irq(adapter->pdev->irq);
967 * igb_irq_enable - Enable default interrupt generation settings
968 * @adapter: board private structure
970 static void igb_irq_enable(struct igb_adapter *adapter)
972 struct e1000_hw *hw = &adapter->hw;
974 if (adapter->msix_entries) {
975 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
976 u32 regval = rd32(E1000_EIAC);
977 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
978 regval = rd32(E1000_EIAM);
979 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
980 wr32(E1000_EIMS, adapter->eims_enable_mask);
981 if (adapter->vfs_allocated_count) {
982 wr32(E1000_MBVFIMR, 0xFF);
983 ims |= E1000_IMS_VMMB;
985 if (adapter->hw.mac.type == e1000_82580)
986 ims |= E1000_IMS_DRSTA;
988 wr32(E1000_IMS, ims);
990 wr32(E1000_IMS, IMS_ENABLE_MASK |
992 wr32(E1000_IAM, IMS_ENABLE_MASK |
997 static void igb_update_mng_vlan(struct igb_adapter *adapter)
999 struct e1000_hw *hw = &adapter->hw;
1000 u16 vid = adapter->hw.mng_cookie.vlan_id;
1001 u16 old_vid = adapter->mng_vlan_id;
1003 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1004 /* add VID to filter table */
1005 igb_vfta_set(hw, vid, true);
1006 adapter->mng_vlan_id = vid;
1008 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1011 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1013 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1014 /* remove VID from filter table */
1015 igb_vfta_set(hw, old_vid, false);
1020 * igb_release_hw_control - release control of the h/w to f/w
1021 * @adapter: address of board private structure
1023 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1024 * For ASF and Pass Through versions of f/w this means that the
1025 * driver is no longer loaded.
1028 static void igb_release_hw_control(struct igb_adapter *adapter)
1030 struct e1000_hw *hw = &adapter->hw;
1033 /* Let firmware take over control of h/w */
1034 ctrl_ext = rd32(E1000_CTRL_EXT);
1035 wr32(E1000_CTRL_EXT,
1036 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1040 * igb_get_hw_control - get control of the h/w from f/w
1041 * @adapter: address of board private structure
1043 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1044 * For ASF and Pass Through versions of f/w this means that
1045 * the driver is loaded.
1048 static void igb_get_hw_control(struct igb_adapter *adapter)
1050 struct e1000_hw *hw = &adapter->hw;
1053 /* Let firmware know the driver has taken over */
1054 ctrl_ext = rd32(E1000_CTRL_EXT);
1055 wr32(E1000_CTRL_EXT,
1056 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1060 * igb_configure - configure the hardware for RX and TX
1061 * @adapter: private board structure
1063 static void igb_configure(struct igb_adapter *adapter)
1065 struct net_device *netdev = adapter->netdev;
1068 igb_get_hw_control(adapter);
1069 igb_set_rx_mode(netdev);
1071 igb_restore_vlan(adapter);
1073 igb_setup_tctl(adapter);
1074 igb_setup_mrqc(adapter);
1075 igb_setup_rctl(adapter);
1077 igb_configure_tx(adapter);
1078 igb_configure_rx(adapter);
1080 igb_rx_fifo_flush_82575(&adapter->hw);
1082 /* call igb_desc_unused which always leaves
1083 * at least 1 descriptor unused to make sure
1084 * next_to_use != next_to_clean */
1085 for (i = 0; i < adapter->num_rx_queues; i++) {
1086 struct igb_ring *ring = adapter->rx_ring[i];
1087 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1092 * igb_power_up_link - Power up the phy/serdes link
1093 * @adapter: address of board private structure
1095 void igb_power_up_link(struct igb_adapter *adapter)
1097 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1098 igb_power_up_phy_copper(&adapter->hw);
1100 igb_power_up_serdes_link_82575(&adapter->hw);
1104 * igb_power_down_link - Power down the phy/serdes link
1105 * @adapter: address of board private structure
1107 static void igb_power_down_link(struct igb_adapter *adapter)
1109 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1110 igb_power_down_phy_copper_82575(&adapter->hw);
1112 igb_shutdown_serdes_link_82575(&adapter->hw);
1116 * igb_up - Open the interface and prepare it to handle traffic
1117 * @adapter: board private structure
1119 int igb_up(struct igb_adapter *adapter)
1121 struct e1000_hw *hw = &adapter->hw;
1124 /* hardware has been reset, we need to reload some things */
1125 igb_configure(adapter);
1127 clear_bit(__IGB_DOWN, &adapter->state);
1129 for (i = 0; i < adapter->num_q_vectors; i++) {
1130 struct igb_q_vector *q_vector = adapter->q_vector[i];
1131 napi_enable(&q_vector->napi);
1133 if (adapter->msix_entries)
1134 igb_configure_msix(adapter);
1136 igb_assign_vector(adapter->q_vector[0], 0);
1138 /* Clear any pending interrupts. */
1140 igb_irq_enable(adapter);
1142 /* notify VFs that reset has been completed */
1143 if (adapter->vfs_allocated_count) {
1144 u32 reg_data = rd32(E1000_CTRL_EXT);
1145 reg_data |= E1000_CTRL_EXT_PFRSTD;
1146 wr32(E1000_CTRL_EXT, reg_data);
1149 netif_tx_start_all_queues(adapter->netdev);
1151 /* start the watchdog. */
1152 hw->mac.get_link_status = 1;
1153 schedule_work(&adapter->watchdog_task);
1158 void igb_down(struct igb_adapter *adapter)
1160 struct net_device *netdev = adapter->netdev;
1161 struct e1000_hw *hw = &adapter->hw;
1165 /* signal that we're down so the interrupt handler does not
1166 * reschedule our watchdog timer */
1167 set_bit(__IGB_DOWN, &adapter->state);
1169 /* disable receives in the hardware */
1170 rctl = rd32(E1000_RCTL);
1171 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1172 /* flush and sleep below */
1174 netif_tx_stop_all_queues(netdev);
1176 /* disable transmits in the hardware */
1177 tctl = rd32(E1000_TCTL);
1178 tctl &= ~E1000_TCTL_EN;
1179 wr32(E1000_TCTL, tctl);
1180 /* flush both disables and wait for them to finish */
1184 for (i = 0; i < adapter->num_q_vectors; i++) {
1185 struct igb_q_vector *q_vector = adapter->q_vector[i];
1186 napi_disable(&q_vector->napi);
1189 igb_irq_disable(adapter);
1191 del_timer_sync(&adapter->watchdog_timer);
1192 del_timer_sync(&adapter->phy_info_timer);
1194 netif_carrier_off(netdev);
1196 /* record the stats before reset*/
1197 igb_update_stats(adapter);
1199 adapter->link_speed = 0;
1200 adapter->link_duplex = 0;
1202 if (!pci_channel_offline(adapter->pdev))
1204 igb_clean_all_tx_rings(adapter);
1205 igb_clean_all_rx_rings(adapter);
1206 #ifdef CONFIG_IGB_DCA
1208 /* since we reset the hardware DCA settings were cleared */
1209 igb_setup_dca(adapter);
1213 void igb_reinit_locked(struct igb_adapter *adapter)
1215 WARN_ON(in_interrupt());
1216 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1220 clear_bit(__IGB_RESETTING, &adapter->state);
1223 void igb_reset(struct igb_adapter *adapter)
1225 struct pci_dev *pdev = adapter->pdev;
1226 struct e1000_hw *hw = &adapter->hw;
1227 struct e1000_mac_info *mac = &hw->mac;
1228 struct e1000_fc_info *fc = &hw->fc;
1229 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1232 /* Repartition Pba for greater than 9k mtu
1233 * To take effect CTRL.RST is required.
1235 switch (mac->type) {
1238 pba = rd32(E1000_RXPBS);
1239 pba = igb_rxpbs_adjust_82580(pba);
1242 pba = rd32(E1000_RXPBS);
1243 pba &= E1000_RXPBS_SIZE_MASK_82576;
1247 pba = E1000_PBA_34K;
1251 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1252 (mac->type < e1000_82576)) {
1253 /* adjust PBA for jumbo frames */
1254 wr32(E1000_PBA, pba);
1256 /* To maintain wire speed transmits, the Tx FIFO should be
1257 * large enough to accommodate two full transmit packets,
1258 * rounded up to the next 1KB and expressed in KB. Likewise,
1259 * the Rx FIFO should be large enough to accommodate at least
1260 * one full receive packet and is similarly rounded up and
1261 * expressed in KB. */
1262 pba = rd32(E1000_PBA);
1263 /* upper 16 bits has Tx packet buffer allocation size in KB */
1264 tx_space = pba >> 16;
1265 /* lower 16 bits has Rx packet buffer allocation size in KB */
1267 /* the tx fifo also stores 16 bytes of information about the tx
1268 * but don't include ethernet FCS because hardware appends it */
1269 min_tx_space = (adapter->max_frame_size +
1270 sizeof(union e1000_adv_tx_desc) -
1272 min_tx_space = ALIGN(min_tx_space, 1024);
1273 min_tx_space >>= 10;
1274 /* software strips receive CRC, so leave room for it */
1275 min_rx_space = adapter->max_frame_size;
1276 min_rx_space = ALIGN(min_rx_space, 1024);
1277 min_rx_space >>= 10;
1279 /* If current Tx allocation is less than the min Tx FIFO size,
1280 * and the min Tx FIFO size is less than the current Rx FIFO
1281 * allocation, take space away from current Rx allocation */
1282 if (tx_space < min_tx_space &&
1283 ((min_tx_space - tx_space) < pba)) {
1284 pba = pba - (min_tx_space - tx_space);
1286 /* if short on rx space, rx wins and must trump tx
1288 if (pba < min_rx_space)
1291 wr32(E1000_PBA, pba);
1294 /* flow control settings */
1295 /* The high water mark must be low enough to fit one full frame
1296 * (or the size used for early receive) above it in the Rx FIFO.
1297 * Set it to the lower of:
1298 * - 90% of the Rx FIFO size, or
1299 * - the full Rx FIFO size minus one full frame */
1300 hwm = min(((pba << 10) * 9 / 10),
1301 ((pba << 10) - 2 * adapter->max_frame_size));
1303 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1304 fc->low_water = fc->high_water - 16;
1305 fc->pause_time = 0xFFFF;
1307 fc->current_mode = fc->requested_mode;
1309 /* disable receive for all VFs and wait one second */
1310 if (adapter->vfs_allocated_count) {
1312 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1313 adapter->vf_data[i].flags = 0;
1315 /* ping all the active vfs to let them know we are going down */
1316 igb_ping_all_vfs(adapter);
1318 /* disable transmits and receives */
1319 wr32(E1000_VFRE, 0);
1320 wr32(E1000_VFTE, 0);
1323 /* Allow time for pending master requests to run */
1324 hw->mac.ops.reset_hw(hw);
1327 if (hw->mac.ops.init_hw(hw))
1328 dev_err(&pdev->dev, "Hardware Error\n");
1330 if (hw->mac.type == e1000_82580) {
1331 u32 reg = rd32(E1000_PCIEMISC);
1332 wr32(E1000_PCIEMISC,
1333 reg & ~E1000_PCIEMISC_LX_DECISION);
1335 if (!netif_running(adapter->netdev))
1336 igb_power_down_link(adapter);
1338 igb_update_mng_vlan(adapter);
1340 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1341 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1343 igb_get_phy_info(hw);
1346 static const struct net_device_ops igb_netdev_ops = {
1347 .ndo_open = igb_open,
1348 .ndo_stop = igb_close,
1349 .ndo_start_xmit = igb_xmit_frame_adv,
1350 .ndo_get_stats = igb_get_stats,
1351 .ndo_set_rx_mode = igb_set_rx_mode,
1352 .ndo_set_multicast_list = igb_set_rx_mode,
1353 .ndo_set_mac_address = igb_set_mac,
1354 .ndo_change_mtu = igb_change_mtu,
1355 .ndo_do_ioctl = igb_ioctl,
1356 .ndo_tx_timeout = igb_tx_timeout,
1357 .ndo_validate_addr = eth_validate_addr,
1358 .ndo_vlan_rx_register = igb_vlan_rx_register,
1359 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1360 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1361 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1362 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1363 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1364 .ndo_get_vf_config = igb_ndo_get_vf_config,
1365 #ifdef CONFIG_NET_POLL_CONTROLLER
1366 .ndo_poll_controller = igb_netpoll,
1371 * igb_probe - Device Initialization Routine
1372 * @pdev: PCI device information struct
1373 * @ent: entry in igb_pci_tbl
1375 * Returns 0 on success, negative on failure
1377 * igb_probe initializes an adapter identified by a pci_dev structure.
1378 * The OS initialization, configuring of the adapter private structure,
1379 * and a hardware reset occur.
1381 static int __devinit igb_probe(struct pci_dev *pdev,
1382 const struct pci_device_id *ent)
1384 struct net_device *netdev;
1385 struct igb_adapter *adapter;
1386 struct e1000_hw *hw;
1387 u16 eeprom_data = 0;
1388 static int global_quad_port_a; /* global quad port a indication */
1389 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1390 unsigned long mmio_start, mmio_len;
1391 int err, pci_using_dac;
1392 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1395 err = pci_enable_device_mem(pdev);
1400 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1402 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1406 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1408 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1410 dev_err(&pdev->dev, "No usable DMA "
1411 "configuration, aborting\n");
1417 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1423 pci_enable_pcie_error_reporting(pdev);
1425 pci_set_master(pdev);
1426 pci_save_state(pdev);
1429 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1430 IGB_ABS_MAX_TX_QUEUES);
1432 goto err_alloc_etherdev;
1434 SET_NETDEV_DEV(netdev, &pdev->dev);
1436 pci_set_drvdata(pdev, netdev);
1437 adapter = netdev_priv(netdev);
1438 adapter->netdev = netdev;
1439 adapter->pdev = pdev;
1442 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1444 mmio_start = pci_resource_start(pdev, 0);
1445 mmio_len = pci_resource_len(pdev, 0);
1448 hw->hw_addr = ioremap(mmio_start, mmio_len);
1452 netdev->netdev_ops = &igb_netdev_ops;
1453 igb_set_ethtool_ops(netdev);
1454 netdev->watchdog_timeo = 5 * HZ;
1456 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1458 netdev->mem_start = mmio_start;
1459 netdev->mem_end = mmio_start + mmio_len;
1461 /* PCI config space info */
1462 hw->vendor_id = pdev->vendor;
1463 hw->device_id = pdev->device;
1464 hw->revision_id = pdev->revision;
1465 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1466 hw->subsystem_device_id = pdev->subsystem_device;
1468 /* Copy the default MAC, PHY and NVM function pointers */
1469 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1470 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1471 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1472 /* Initialize skew-specific constants */
1473 err = ei->get_invariants(hw);
1477 /* setup the private structure */
1478 err = igb_sw_init(adapter);
1482 igb_get_bus_info_pcie(hw);
1484 hw->phy.autoneg_wait_to_complete = false;
1486 /* Copper options */
1487 if (hw->phy.media_type == e1000_media_type_copper) {
1488 hw->phy.mdix = AUTO_ALL_MODES;
1489 hw->phy.disable_polarity_correction = false;
1490 hw->phy.ms_type = e1000_ms_hw_default;
1493 if (igb_check_reset_block(hw))
1494 dev_info(&pdev->dev,
1495 "PHY reset is blocked due to SOL/IDER session.\n");
1497 netdev->features = NETIF_F_SG |
1499 NETIF_F_HW_VLAN_TX |
1500 NETIF_F_HW_VLAN_RX |
1501 NETIF_F_HW_VLAN_FILTER;
1503 netdev->features |= NETIF_F_IPV6_CSUM;
1504 netdev->features |= NETIF_F_TSO;
1505 netdev->features |= NETIF_F_TSO6;
1506 netdev->features |= NETIF_F_GRO;
1508 netdev->vlan_features |= NETIF_F_TSO;
1509 netdev->vlan_features |= NETIF_F_TSO6;
1510 netdev->vlan_features |= NETIF_F_IP_CSUM;
1511 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1512 netdev->vlan_features |= NETIF_F_SG;
1515 netdev->features |= NETIF_F_HIGHDMA;
1517 if (hw->mac.type >= e1000_82576)
1518 netdev->features |= NETIF_F_SCTP_CSUM;
1520 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1522 /* before reading the NVM, reset the controller to put the device in a
1523 * known good starting state */
1524 hw->mac.ops.reset_hw(hw);
1526 /* make sure the NVM is good */
1527 if (igb_validate_nvm_checksum(hw) < 0) {
1528 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1533 /* copy the MAC address out of the NVM */
1534 if (hw->mac.ops.read_mac_addr(hw))
1535 dev_err(&pdev->dev, "NVM Read Error\n");
1537 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1538 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1540 if (!is_valid_ether_addr(netdev->perm_addr)) {
1541 dev_err(&pdev->dev, "Invalid MAC Address\n");
1546 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1547 (unsigned long) adapter);
1548 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1549 (unsigned long) adapter);
1551 INIT_WORK(&adapter->reset_task, igb_reset_task);
1552 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1554 /* Initialize link properties that are user-changeable */
1555 adapter->fc_autoneg = true;
1556 hw->mac.autoneg = true;
1557 hw->phy.autoneg_advertised = 0x2f;
1559 hw->fc.requested_mode = e1000_fc_default;
1560 hw->fc.current_mode = e1000_fc_default;
1562 igb_validate_mdi_setting(hw);
1564 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1565 * enable the ACPI Magic Packet filter
1568 if (hw->bus.func == 0)
1569 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1570 else if (hw->mac.type == e1000_82580)
1571 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1572 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1574 else if (hw->bus.func == 1)
1575 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1577 if (eeprom_data & eeprom_apme_mask)
1578 adapter->eeprom_wol |= E1000_WUFC_MAG;
1580 /* now that we have the eeprom settings, apply the special cases where
1581 * the eeprom may be wrong or the board simply won't support wake on
1582 * lan on a particular port */
1583 switch (pdev->device) {
1584 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1585 adapter->eeprom_wol = 0;
1587 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1588 case E1000_DEV_ID_82576_FIBER:
1589 case E1000_DEV_ID_82576_SERDES:
1590 /* Wake events only supported on port A for dual fiber
1591 * regardless of eeprom setting */
1592 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1593 adapter->eeprom_wol = 0;
1595 case E1000_DEV_ID_82576_QUAD_COPPER:
1596 /* if quad port adapter, disable WoL on all but port A */
1597 if (global_quad_port_a != 0)
1598 adapter->eeprom_wol = 0;
1600 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1601 /* Reset for multiple quad port adapters */
1602 if (++global_quad_port_a == 4)
1603 global_quad_port_a = 0;
1607 /* initialize the wol settings based on the eeprom settings */
1608 adapter->wol = adapter->eeprom_wol;
1609 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1611 /* reset the hardware with the new settings */
1614 /* let the f/w know that the h/w is now under the control of the
1616 igb_get_hw_control(adapter);
1618 strcpy(netdev->name, "eth%d");
1619 err = register_netdev(netdev);
1623 /* carrier off reporting is important to ethtool even BEFORE open */
1624 netif_carrier_off(netdev);
1626 #ifdef CONFIG_IGB_DCA
1627 if (dca_add_requester(&pdev->dev) == 0) {
1628 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1629 dev_info(&pdev->dev, "DCA enabled\n");
1630 igb_setup_dca(adapter);
1634 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1635 /* print bus type/speed/width info */
1636 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1638 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1640 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1641 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1642 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1646 igb_read_part_num(hw, &part_num);
1647 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1648 (part_num >> 8), (part_num & 0xff));
1650 dev_info(&pdev->dev,
1651 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1652 adapter->msix_entries ? "MSI-X" :
1653 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1654 adapter->num_rx_queues, adapter->num_tx_queues);
1659 igb_release_hw_control(adapter);
1661 if (!igb_check_reset_block(hw))
1664 if (hw->flash_address)
1665 iounmap(hw->flash_address);
1667 igb_clear_interrupt_scheme(adapter);
1668 iounmap(hw->hw_addr);
1670 free_netdev(netdev);
1672 pci_release_selected_regions(pdev,
1673 pci_select_bars(pdev, IORESOURCE_MEM));
1676 pci_disable_device(pdev);
1681 * igb_remove - Device Removal Routine
1682 * @pdev: PCI device information struct
1684 * igb_remove is called by the PCI subsystem to alert the driver
1685 * that it should release a PCI device. The could be caused by a
1686 * Hot-Plug event, or because the driver is going to be removed from
1689 static void __devexit igb_remove(struct pci_dev *pdev)
1691 struct net_device *netdev = pci_get_drvdata(pdev);
1692 struct igb_adapter *adapter = netdev_priv(netdev);
1693 struct e1000_hw *hw = &adapter->hw;
1695 /* flush_scheduled work may reschedule our watchdog task, so
1696 * explicitly disable watchdog tasks from being rescheduled */
1697 set_bit(__IGB_DOWN, &adapter->state);
1698 del_timer_sync(&adapter->watchdog_timer);
1699 del_timer_sync(&adapter->phy_info_timer);
1701 flush_scheduled_work();
1703 #ifdef CONFIG_IGB_DCA
1704 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1705 dev_info(&pdev->dev, "DCA disabled\n");
1706 dca_remove_requester(&pdev->dev);
1707 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1708 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1712 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1713 * would have already happened in close and is redundant. */
1714 igb_release_hw_control(adapter);
1716 unregister_netdev(netdev);
1718 igb_clear_interrupt_scheme(adapter);
1720 #ifdef CONFIG_PCI_IOV
1721 /* reclaim resources allocated to VFs */
1722 if (adapter->vf_data) {
1723 /* disable iov and allow time for transactions to clear */
1724 pci_disable_sriov(pdev);
1727 kfree(adapter->vf_data);
1728 adapter->vf_data = NULL;
1729 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1731 dev_info(&pdev->dev, "IOV Disabled\n");
1735 iounmap(hw->hw_addr);
1736 if (hw->flash_address)
1737 iounmap(hw->flash_address);
1738 pci_release_selected_regions(pdev,
1739 pci_select_bars(pdev, IORESOURCE_MEM));
1741 free_netdev(netdev);
1743 pci_disable_pcie_error_reporting(pdev);
1745 pci_disable_device(pdev);
1749 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1750 * @adapter: board private structure to initialize
1752 * This function initializes the vf specific data storage and then attempts to
1753 * allocate the VFs. The reason for ordering it this way is because it is much
1754 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1755 * the memory for the VFs.
1757 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1759 #ifdef CONFIG_PCI_IOV
1760 struct pci_dev *pdev = adapter->pdev;
1762 if (adapter->vfs_allocated_count > 7)
1763 adapter->vfs_allocated_count = 7;
1765 if (adapter->vfs_allocated_count) {
1766 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1767 sizeof(struct vf_data_storage),
1769 /* if allocation failed then we do not support SR-IOV */
1770 if (!adapter->vf_data) {
1771 adapter->vfs_allocated_count = 0;
1772 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1777 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1778 kfree(adapter->vf_data);
1779 adapter->vf_data = NULL;
1780 #endif /* CONFIG_PCI_IOV */
1781 adapter->vfs_allocated_count = 0;
1782 #ifdef CONFIG_PCI_IOV
1784 unsigned char mac_addr[ETH_ALEN];
1786 dev_info(&pdev->dev, "%d vfs allocated\n",
1787 adapter->vfs_allocated_count);
1788 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1789 random_ether_addr(mac_addr);
1790 igb_set_vf_mac(adapter, i, mac_addr);
1793 #endif /* CONFIG_PCI_IOV */
1798 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1799 * @adapter: board private structure to initialize
1801 * igb_init_hw_timer initializes the function pointer and values for the hw
1802 * timer found in hardware.
1804 static void igb_init_hw_timer(struct igb_adapter *adapter)
1806 struct e1000_hw *hw = &adapter->hw;
1808 switch (hw->mac.type) {
1811 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1812 adapter->cycles.read = igb_read_clock;
1813 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1814 adapter->cycles.mult = 1;
1816 * The 82580 timesync updates the system timer every 8ns by 8ns
1817 * and the value cannot be shifted. Instead we need to shift
1818 * the registers to generate a 64bit timer value. As a result
1819 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1820 * 24 in order to generate a larger value for synchronization.
1822 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1823 /* disable system timer temporarily by setting bit 31 */
1824 wr32(E1000_TSAUXC, 0x80000000);
1827 /* Set registers so that rollover occurs soon to test this. */
1828 wr32(E1000_SYSTIMR, 0x00000000);
1829 wr32(E1000_SYSTIML, 0x80000000);
1830 wr32(E1000_SYSTIMH, 0x000000FF);
1833 /* enable system timer by clearing bit 31 */
1834 wr32(E1000_TSAUXC, 0x0);
1837 timecounter_init(&adapter->clock,
1839 ktime_to_ns(ktime_get_real()));
1841 * Synchronize our NIC clock against system wall clock. NIC
1842 * time stamp reading requires ~3us per sample, each sample
1843 * was pretty stable even under load => only require 10
1844 * samples for each offset comparison.
1846 memset(&adapter->compare, 0, sizeof(adapter->compare));
1847 adapter->compare.source = &adapter->clock;
1848 adapter->compare.target = ktime_get_real;
1849 adapter->compare.num_samples = 10;
1850 timecompare_update(&adapter->compare, 0);
1854 * Initialize hardware timer: we keep it running just in case
1855 * that some program needs it later on.
1857 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1858 adapter->cycles.read = igb_read_clock;
1859 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1860 adapter->cycles.mult = 1;
1862 * Scale the NIC clock cycle by a large factor so that
1863 * relatively small clock corrections can be added or
1864 * substracted at each clock tick. The drawbacks of a large
1865 * factor are a) that the clock register overflows more quickly
1866 * (not such a big deal) and b) that the increment per tick has
1867 * to fit into 24 bits. As a result we need to use a shift of
1868 * 19 so we can fit a value of 16 into the TIMINCA register.
1870 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1872 (1 << E1000_TIMINCA_16NS_SHIFT) |
1873 (16 << IGB_82576_TSYNC_SHIFT));
1875 /* Set registers so that rollover occurs soon to test this. */
1876 wr32(E1000_SYSTIML, 0x00000000);
1877 wr32(E1000_SYSTIMH, 0xFF800000);
1880 timecounter_init(&adapter->clock,
1882 ktime_to_ns(ktime_get_real()));
1884 * Synchronize our NIC clock against system wall clock. NIC
1885 * time stamp reading requires ~3us per sample, each sample
1886 * was pretty stable even under load => only require 10
1887 * samples for each offset comparison.
1889 memset(&adapter->compare, 0, sizeof(adapter->compare));
1890 adapter->compare.source = &adapter->clock;
1891 adapter->compare.target = ktime_get_real;
1892 adapter->compare.num_samples = 10;
1893 timecompare_update(&adapter->compare, 0);
1896 /* 82575 does not support timesync */
1904 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1905 * @adapter: board private structure to initialize
1907 * igb_sw_init initializes the Adapter private data structure.
1908 * Fields are initialized based on PCI device information and
1909 * OS network device settings (MTU size).
1911 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1913 struct e1000_hw *hw = &adapter->hw;
1914 struct net_device *netdev = adapter->netdev;
1915 struct pci_dev *pdev = adapter->pdev;
1917 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1919 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1920 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1921 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1922 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1924 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1925 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1927 #ifdef CONFIG_PCI_IOV
1928 if (hw->mac.type == e1000_82576)
1929 adapter->vfs_allocated_count = max_vfs;
1931 #endif /* CONFIG_PCI_IOV */
1932 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1935 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1936 * then we should combine the queues into a queue pair in order to
1937 * conserve interrupts due to limited supply
1939 if ((adapter->rss_queues > 4) ||
1940 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1941 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1943 /* This call may decrease the number of queues */
1944 if (igb_init_interrupt_scheme(adapter)) {
1945 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1949 igb_init_hw_timer(adapter);
1950 igb_probe_vfs(adapter);
1952 /* Explicitly disable IRQ since the NIC can be in any state. */
1953 igb_irq_disable(adapter);
1955 set_bit(__IGB_DOWN, &adapter->state);
1960 * igb_open - Called when a network interface is made active
1961 * @netdev: network interface device structure
1963 * Returns 0 on success, negative value on failure
1965 * The open entry point is called when a network interface is made
1966 * active by the system (IFF_UP). At this point all resources needed
1967 * for transmit and receive operations are allocated, the interrupt
1968 * handler is registered with the OS, the watchdog timer is started,
1969 * and the stack is notified that the interface is ready.
1971 static int igb_open(struct net_device *netdev)
1973 struct igb_adapter *adapter = netdev_priv(netdev);
1974 struct e1000_hw *hw = &adapter->hw;
1978 /* disallow open during test */
1979 if (test_bit(__IGB_TESTING, &adapter->state))
1982 netif_carrier_off(netdev);
1984 /* allocate transmit descriptors */
1985 err = igb_setup_all_tx_resources(adapter);
1989 /* allocate receive descriptors */
1990 err = igb_setup_all_rx_resources(adapter);
1994 igb_power_up_link(adapter);
1996 /* before we allocate an interrupt, we must be ready to handle it.
1997 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1998 * as soon as we call pci_request_irq, so we have to setup our
1999 * clean_rx handler before we do so. */
2000 igb_configure(adapter);
2002 err = igb_request_irq(adapter);
2006 /* From here on the code is the same as igb_up() */
2007 clear_bit(__IGB_DOWN, &adapter->state);
2009 for (i = 0; i < adapter->num_q_vectors; i++) {
2010 struct igb_q_vector *q_vector = adapter->q_vector[i];
2011 napi_enable(&q_vector->napi);
2014 /* Clear any pending interrupts. */
2017 igb_irq_enable(adapter);
2019 /* notify VFs that reset has been completed */
2020 if (adapter->vfs_allocated_count) {
2021 u32 reg_data = rd32(E1000_CTRL_EXT);
2022 reg_data |= E1000_CTRL_EXT_PFRSTD;
2023 wr32(E1000_CTRL_EXT, reg_data);
2026 netif_tx_start_all_queues(netdev);
2028 /* start the watchdog. */
2029 hw->mac.get_link_status = 1;
2030 schedule_work(&adapter->watchdog_task);
2035 igb_release_hw_control(adapter);
2036 igb_power_down_link(adapter);
2037 igb_free_all_rx_resources(adapter);
2039 igb_free_all_tx_resources(adapter);
2047 * igb_close - Disables a network interface
2048 * @netdev: network interface device structure
2050 * Returns 0, this is not allowed to fail
2052 * The close entry point is called when an interface is de-activated
2053 * by the OS. The hardware is still under the driver's control, but
2054 * needs to be disabled. A global MAC reset is issued to stop the
2055 * hardware, and all transmit and receive resources are freed.
2057 static int igb_close(struct net_device *netdev)
2059 struct igb_adapter *adapter = netdev_priv(netdev);
2061 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2064 igb_free_irq(adapter);
2066 igb_free_all_tx_resources(adapter);
2067 igb_free_all_rx_resources(adapter);
2073 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2074 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2076 * Return 0 on success, negative on failure
2078 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2080 struct pci_dev *pdev = tx_ring->pdev;
2083 size = sizeof(struct igb_buffer) * tx_ring->count;
2084 tx_ring->buffer_info = vmalloc(size);
2085 if (!tx_ring->buffer_info)
2087 memset(tx_ring->buffer_info, 0, size);
2089 /* round up to nearest 4K */
2090 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2091 tx_ring->size = ALIGN(tx_ring->size, 4096);
2093 tx_ring->desc = pci_alloc_consistent(pdev,
2100 tx_ring->next_to_use = 0;
2101 tx_ring->next_to_clean = 0;
2105 vfree(tx_ring->buffer_info);
2107 "Unable to allocate memory for the transmit descriptor ring\n");
2112 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2113 * (Descriptors) for all queues
2114 * @adapter: board private structure
2116 * Return 0 on success, negative on failure
2118 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2120 struct pci_dev *pdev = adapter->pdev;
2123 for (i = 0; i < adapter->num_tx_queues; i++) {
2124 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2127 "Allocation for Tx Queue %u failed\n", i);
2128 for (i--; i >= 0; i--)
2129 igb_free_tx_resources(adapter->tx_ring[i]);
2134 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2135 int r_idx = i % adapter->num_tx_queues;
2136 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2142 * igb_setup_tctl - configure the transmit control registers
2143 * @adapter: Board private structure
2145 void igb_setup_tctl(struct igb_adapter *adapter)
2147 struct e1000_hw *hw = &adapter->hw;
2150 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2151 wr32(E1000_TXDCTL(0), 0);
2153 /* Program the Transmit Control Register */
2154 tctl = rd32(E1000_TCTL);
2155 tctl &= ~E1000_TCTL_CT;
2156 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2157 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2159 igb_config_collision_dist(hw);
2161 /* Enable transmits */
2162 tctl |= E1000_TCTL_EN;
2164 wr32(E1000_TCTL, tctl);
2168 * igb_configure_tx_ring - Configure transmit ring after Reset
2169 * @adapter: board private structure
2170 * @ring: tx ring to configure
2172 * Configure a transmit ring after a reset.
2174 void igb_configure_tx_ring(struct igb_adapter *adapter,
2175 struct igb_ring *ring)
2177 struct e1000_hw *hw = &adapter->hw;
2179 u64 tdba = ring->dma;
2180 int reg_idx = ring->reg_idx;
2182 /* disable the queue */
2183 txdctl = rd32(E1000_TXDCTL(reg_idx));
2184 wr32(E1000_TXDCTL(reg_idx),
2185 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2189 wr32(E1000_TDLEN(reg_idx),
2190 ring->count * sizeof(union e1000_adv_tx_desc));
2191 wr32(E1000_TDBAL(reg_idx),
2192 tdba & 0x00000000ffffffffULL);
2193 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2195 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2196 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2197 writel(0, ring->head);
2198 writel(0, ring->tail);
2200 txdctl |= IGB_TX_PTHRESH;
2201 txdctl |= IGB_TX_HTHRESH << 8;
2202 txdctl |= IGB_TX_WTHRESH << 16;
2204 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2205 wr32(E1000_TXDCTL(reg_idx), txdctl);
2209 * igb_configure_tx - Configure transmit Unit after Reset
2210 * @adapter: board private structure
2212 * Configure the Tx unit of the MAC after a reset.
2214 static void igb_configure_tx(struct igb_adapter *adapter)
2218 for (i = 0; i < adapter->num_tx_queues; i++)
2219 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2223 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2224 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2226 * Returns 0 on success, negative on failure
2228 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2230 struct pci_dev *pdev = rx_ring->pdev;
2233 size = sizeof(struct igb_buffer) * rx_ring->count;
2234 rx_ring->buffer_info = vmalloc(size);
2235 if (!rx_ring->buffer_info)
2237 memset(rx_ring->buffer_info, 0, size);
2239 desc_len = sizeof(union e1000_adv_rx_desc);
2241 /* Round up to nearest 4K */
2242 rx_ring->size = rx_ring->count * desc_len;
2243 rx_ring->size = ALIGN(rx_ring->size, 4096);
2245 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2251 rx_ring->next_to_clean = 0;
2252 rx_ring->next_to_use = 0;
2257 vfree(rx_ring->buffer_info);
2258 rx_ring->buffer_info = NULL;
2259 dev_err(&pdev->dev, "Unable to allocate memory for "
2260 "the receive descriptor ring\n");
2265 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2266 * (Descriptors) for all queues
2267 * @adapter: board private structure
2269 * Return 0 on success, negative on failure
2271 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2273 struct pci_dev *pdev = adapter->pdev;
2276 for (i = 0; i < adapter->num_rx_queues; i++) {
2277 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2280 "Allocation for Rx Queue %u failed\n", i);
2281 for (i--; i >= 0; i--)
2282 igb_free_rx_resources(adapter->rx_ring[i]);
2291 * igb_setup_mrqc - configure the multiple receive queue control registers
2292 * @adapter: Board private structure
2294 static void igb_setup_mrqc(struct igb_adapter *adapter)
2296 struct e1000_hw *hw = &adapter->hw;
2298 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2303 static const u8 rsshash[40] = {
2304 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2305 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2306 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2307 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2309 /* Fill out hash function seeds */
2310 for (j = 0; j < 10; j++) {
2311 u32 rsskey = rsshash[(j * 4)];
2312 rsskey |= rsshash[(j * 4) + 1] << 8;
2313 rsskey |= rsshash[(j * 4) + 2] << 16;
2314 rsskey |= rsshash[(j * 4) + 3] << 24;
2315 array_wr32(E1000_RSSRK(0), j, rsskey);
2318 num_rx_queues = adapter->rss_queues;
2320 if (adapter->vfs_allocated_count) {
2321 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2322 switch (hw->mac.type) {
2339 if (hw->mac.type == e1000_82575)
2343 for (j = 0; j < (32 * 4); j++) {
2344 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2346 reta.bytes[j & 3] |= num_rx_queues << shift2;
2348 wr32(E1000_RETA(j >> 2), reta.dword);
2352 * Disable raw packet checksumming so that RSS hash is placed in
2353 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2354 * offloads as they are enabled by default
2356 rxcsum = rd32(E1000_RXCSUM);
2357 rxcsum |= E1000_RXCSUM_PCSD;
2359 if (adapter->hw.mac.type >= e1000_82576)
2360 /* Enable Receive Checksum Offload for SCTP */
2361 rxcsum |= E1000_RXCSUM_CRCOFL;
2363 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2364 wr32(E1000_RXCSUM, rxcsum);
2366 /* If VMDq is enabled then we set the appropriate mode for that, else
2367 * we default to RSS so that an RSS hash is calculated per packet even
2368 * if we are only using one queue */
2369 if (adapter->vfs_allocated_count) {
2370 if (hw->mac.type > e1000_82575) {
2371 /* Set the default pool for the PF's first queue */
2372 u32 vtctl = rd32(E1000_VT_CTL);
2373 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2374 E1000_VT_CTL_DISABLE_DEF_POOL);
2375 vtctl |= adapter->vfs_allocated_count <<
2376 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2377 wr32(E1000_VT_CTL, vtctl);
2379 if (adapter->rss_queues > 1)
2380 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2382 mrqc = E1000_MRQC_ENABLE_VMDQ;
2384 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2386 igb_vmm_control(adapter);
2388 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2389 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2390 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2391 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2392 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2393 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2394 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2395 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2397 wr32(E1000_MRQC, mrqc);
2401 * igb_setup_rctl - configure the receive control registers
2402 * @adapter: Board private structure
2404 void igb_setup_rctl(struct igb_adapter *adapter)
2406 struct e1000_hw *hw = &adapter->hw;
2409 rctl = rd32(E1000_RCTL);
2411 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2412 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2414 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2415 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2418 * enable stripping of CRC. It's unlikely this will break BMC
2419 * redirection as it did with e1000. Newer features require
2420 * that the HW strips the CRC.
2422 rctl |= E1000_RCTL_SECRC;
2424 /* disable store bad packets and clear size bits. */
2425 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2427 /* enable LPE to prevent packets larger than max_frame_size */
2428 rctl |= E1000_RCTL_LPE;
2430 /* disable queue 0 to prevent tail write w/o re-config */
2431 wr32(E1000_RXDCTL(0), 0);
2433 /* Attention!!! For SR-IOV PF driver operations you must enable
2434 * queue drop for all VF and PF queues to prevent head of line blocking
2435 * if an un-trusted VF does not provide descriptors to hardware.
2437 if (adapter->vfs_allocated_count) {
2438 /* set all queue drop enable bits */
2439 wr32(E1000_QDE, ALL_QUEUES);
2442 wr32(E1000_RCTL, rctl);
2445 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2448 struct e1000_hw *hw = &adapter->hw;
2451 /* if it isn't the PF check to see if VFs are enabled and
2452 * increase the size to support vlan tags */
2453 if (vfn < adapter->vfs_allocated_count &&
2454 adapter->vf_data[vfn].vlans_enabled)
2455 size += VLAN_TAG_SIZE;
2457 vmolr = rd32(E1000_VMOLR(vfn));
2458 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2459 vmolr |= size | E1000_VMOLR_LPE;
2460 wr32(E1000_VMOLR(vfn), vmolr);
2466 * igb_rlpml_set - set maximum receive packet size
2467 * @adapter: board private structure
2469 * Configure maximum receivable packet size.
2471 static void igb_rlpml_set(struct igb_adapter *adapter)
2473 u32 max_frame_size = adapter->max_frame_size;
2474 struct e1000_hw *hw = &adapter->hw;
2475 u16 pf_id = adapter->vfs_allocated_count;
2478 max_frame_size += VLAN_TAG_SIZE;
2480 /* if vfs are enabled we set RLPML to the largest possible request
2481 * size and set the VMOLR RLPML to the size we need */
2483 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2484 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2487 wr32(E1000_RLPML, max_frame_size);
2490 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2493 struct e1000_hw *hw = &adapter->hw;
2497 * This register exists only on 82576 and newer so if we are older then
2498 * we should exit and do nothing
2500 if (hw->mac.type < e1000_82576)
2503 vmolr = rd32(E1000_VMOLR(vfn));
2504 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2506 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2508 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2510 /* clear all bits that might not be set */
2511 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2513 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2514 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2516 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2519 if (vfn <= adapter->vfs_allocated_count)
2520 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2522 wr32(E1000_VMOLR(vfn), vmolr);
2526 * igb_configure_rx_ring - Configure a receive ring after Reset
2527 * @adapter: board private structure
2528 * @ring: receive ring to be configured
2530 * Configure the Rx unit of the MAC after a reset.
2532 void igb_configure_rx_ring(struct igb_adapter *adapter,
2533 struct igb_ring *ring)
2535 struct e1000_hw *hw = &adapter->hw;
2536 u64 rdba = ring->dma;
2537 int reg_idx = ring->reg_idx;
2540 /* disable the queue */
2541 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2542 wr32(E1000_RXDCTL(reg_idx),
2543 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2545 /* Set DMA base address registers */
2546 wr32(E1000_RDBAL(reg_idx),
2547 rdba & 0x00000000ffffffffULL);
2548 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2549 wr32(E1000_RDLEN(reg_idx),
2550 ring->count * sizeof(union e1000_adv_rx_desc));
2552 /* initialize head and tail */
2553 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2554 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2555 writel(0, ring->head);
2556 writel(0, ring->tail);
2558 /* set descriptor configuration */
2559 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2560 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2561 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2562 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2563 srrctl |= IGB_RXBUFFER_16384 >>
2564 E1000_SRRCTL_BSIZEPKT_SHIFT;
2566 srrctl |= (PAGE_SIZE / 2) >>
2567 E1000_SRRCTL_BSIZEPKT_SHIFT;
2569 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2571 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2572 E1000_SRRCTL_BSIZEPKT_SHIFT;
2573 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2575 if (hw->mac.type == e1000_82580)
2576 srrctl |= E1000_SRRCTL_TIMESTAMP;
2577 /* Only set Drop Enable if we are supporting multiple queues */
2578 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2579 srrctl |= E1000_SRRCTL_DROP_EN;
2581 wr32(E1000_SRRCTL(reg_idx), srrctl);
2583 /* set filtering for VMDQ pools */
2584 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2586 /* enable receive descriptor fetching */
2587 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2588 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2589 rxdctl &= 0xFFF00000;
2590 rxdctl |= IGB_RX_PTHRESH;
2591 rxdctl |= IGB_RX_HTHRESH << 8;
2592 rxdctl |= IGB_RX_WTHRESH << 16;
2593 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2597 * igb_configure_rx - Configure receive Unit after Reset
2598 * @adapter: board private structure
2600 * Configure the Rx unit of the MAC after a reset.
2602 static void igb_configure_rx(struct igb_adapter *adapter)
2606 /* set UTA to appropriate mode */
2607 igb_set_uta(adapter);
2609 /* set the correct pool for the PF default MAC address in entry 0 */
2610 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2611 adapter->vfs_allocated_count);
2613 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2614 * the Base and Length of the Rx Descriptor Ring */
2615 for (i = 0; i < adapter->num_rx_queues; i++)
2616 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2620 * igb_free_tx_resources - Free Tx Resources per Queue
2621 * @tx_ring: Tx descriptor ring for a specific queue
2623 * Free all transmit software resources
2625 void igb_free_tx_resources(struct igb_ring *tx_ring)
2627 igb_clean_tx_ring(tx_ring);
2629 vfree(tx_ring->buffer_info);
2630 tx_ring->buffer_info = NULL;
2632 /* if not set, then don't free */
2636 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2637 tx_ring->desc, tx_ring->dma);
2639 tx_ring->desc = NULL;
2643 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2644 * @adapter: board private structure
2646 * Free all transmit software resources
2648 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2652 for (i = 0; i < adapter->num_tx_queues; i++)
2653 igb_free_tx_resources(adapter->tx_ring[i]);
2656 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2657 struct igb_buffer *buffer_info)
2659 if (buffer_info->dma) {
2660 if (buffer_info->mapped_as_page)
2661 pci_unmap_page(tx_ring->pdev,
2663 buffer_info->length,
2666 pci_unmap_single(tx_ring->pdev,
2668 buffer_info->length,
2670 buffer_info->dma = 0;
2672 if (buffer_info->skb) {
2673 dev_kfree_skb_any(buffer_info->skb);
2674 buffer_info->skb = NULL;
2676 buffer_info->time_stamp = 0;
2677 buffer_info->length = 0;
2678 buffer_info->next_to_watch = 0;
2679 buffer_info->mapped_as_page = false;
2683 * igb_clean_tx_ring - Free Tx Buffers
2684 * @tx_ring: ring to be cleaned
2686 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2688 struct igb_buffer *buffer_info;
2692 if (!tx_ring->buffer_info)
2694 /* Free all the Tx ring sk_buffs */
2696 for (i = 0; i < tx_ring->count; i++) {
2697 buffer_info = &tx_ring->buffer_info[i];
2698 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2701 size = sizeof(struct igb_buffer) * tx_ring->count;
2702 memset(tx_ring->buffer_info, 0, size);
2704 /* Zero out the descriptor ring */
2705 memset(tx_ring->desc, 0, tx_ring->size);
2707 tx_ring->next_to_use = 0;
2708 tx_ring->next_to_clean = 0;
2712 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2713 * @adapter: board private structure
2715 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2719 for (i = 0; i < adapter->num_tx_queues; i++)
2720 igb_clean_tx_ring(adapter->tx_ring[i]);
2724 * igb_free_rx_resources - Free Rx Resources
2725 * @rx_ring: ring to clean the resources from
2727 * Free all receive software resources
2729 void igb_free_rx_resources(struct igb_ring *rx_ring)
2731 igb_clean_rx_ring(rx_ring);
2733 vfree(rx_ring->buffer_info);
2734 rx_ring->buffer_info = NULL;
2736 /* if not set, then don't free */
2740 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2741 rx_ring->desc, rx_ring->dma);
2743 rx_ring->desc = NULL;
2747 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2748 * @adapter: board private structure
2750 * Free all receive software resources
2752 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2756 for (i = 0; i < adapter->num_rx_queues; i++)
2757 igb_free_rx_resources(adapter->rx_ring[i]);
2761 * igb_clean_rx_ring - Free Rx Buffers per Queue
2762 * @rx_ring: ring to free buffers from
2764 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2766 struct igb_buffer *buffer_info;
2770 if (!rx_ring->buffer_info)
2773 /* Free all the Rx ring sk_buffs */
2774 for (i = 0; i < rx_ring->count; i++) {
2775 buffer_info = &rx_ring->buffer_info[i];
2776 if (buffer_info->dma) {
2777 pci_unmap_single(rx_ring->pdev,
2779 rx_ring->rx_buffer_len,
2780 PCI_DMA_FROMDEVICE);
2781 buffer_info->dma = 0;
2784 if (buffer_info->skb) {
2785 dev_kfree_skb(buffer_info->skb);
2786 buffer_info->skb = NULL;
2788 if (buffer_info->page_dma) {
2789 pci_unmap_page(rx_ring->pdev,
2790 buffer_info->page_dma,
2792 PCI_DMA_FROMDEVICE);
2793 buffer_info->page_dma = 0;
2795 if (buffer_info->page) {
2796 put_page(buffer_info->page);
2797 buffer_info->page = NULL;
2798 buffer_info->page_offset = 0;
2802 size = sizeof(struct igb_buffer) * rx_ring->count;
2803 memset(rx_ring->buffer_info, 0, size);
2805 /* Zero out the descriptor ring */
2806 memset(rx_ring->desc, 0, rx_ring->size);
2808 rx_ring->next_to_clean = 0;
2809 rx_ring->next_to_use = 0;
2813 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2814 * @adapter: board private structure
2816 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2820 for (i = 0; i < adapter->num_rx_queues; i++)
2821 igb_clean_rx_ring(adapter->rx_ring[i]);
2825 * igb_set_mac - Change the Ethernet Address of the NIC
2826 * @netdev: network interface device structure
2827 * @p: pointer to an address structure
2829 * Returns 0 on success, negative on failure
2831 static int igb_set_mac(struct net_device *netdev, void *p)
2833 struct igb_adapter *adapter = netdev_priv(netdev);
2834 struct e1000_hw *hw = &adapter->hw;
2835 struct sockaddr *addr = p;
2837 if (!is_valid_ether_addr(addr->sa_data))
2838 return -EADDRNOTAVAIL;
2840 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2841 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2843 /* set the correct pool for the new PF MAC address in entry 0 */
2844 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2845 adapter->vfs_allocated_count);
2851 * igb_write_mc_addr_list - write multicast addresses to MTA
2852 * @netdev: network interface device structure
2854 * Writes multicast address list to the MTA hash table.
2855 * Returns: -ENOMEM on failure
2856 * 0 on no addresses written
2857 * X on writing X addresses to MTA
2859 static int igb_write_mc_addr_list(struct net_device *netdev)
2861 struct igb_adapter *adapter = netdev_priv(netdev);
2862 struct e1000_hw *hw = &adapter->hw;
2863 struct netdev_hw_addr *ha;
2867 if (netdev_mc_empty(netdev)) {
2868 /* nothing to program, so clear mc list */
2869 igb_update_mc_addr_list(hw, NULL, 0);
2870 igb_restore_vf_multicasts(adapter);
2874 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2878 /* The shared function expects a packed array of only addresses. */
2880 netdev_for_each_mc_addr(ha, netdev)
2881 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
2883 igb_update_mc_addr_list(hw, mta_list, i);
2886 return netdev_mc_count(netdev);
2890 * igb_write_uc_addr_list - write unicast addresses to RAR table
2891 * @netdev: network interface device structure
2893 * Writes unicast address list to the RAR table.
2894 * Returns: -ENOMEM on failure/insufficient address space
2895 * 0 on no addresses written
2896 * X on writing X addresses to the RAR table
2898 static int igb_write_uc_addr_list(struct net_device *netdev)
2900 struct igb_adapter *adapter = netdev_priv(netdev);
2901 struct e1000_hw *hw = &adapter->hw;
2902 unsigned int vfn = adapter->vfs_allocated_count;
2903 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2906 /* return ENOMEM indicating insufficient memory for addresses */
2907 if (netdev_uc_count(netdev) > rar_entries)
2910 if (!netdev_uc_empty(netdev) && rar_entries) {
2911 struct netdev_hw_addr *ha;
2913 netdev_for_each_uc_addr(ha, netdev) {
2916 igb_rar_set_qsel(adapter, ha->addr,
2922 /* write the addresses in reverse order to avoid write combining */
2923 for (; rar_entries > 0 ; rar_entries--) {
2924 wr32(E1000_RAH(rar_entries), 0);
2925 wr32(E1000_RAL(rar_entries), 0);
2933 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2934 * @netdev: network interface device structure
2936 * The set_rx_mode entry point is called whenever the unicast or multicast
2937 * address lists or the network interface flags are updated. This routine is
2938 * responsible for configuring the hardware for proper unicast, multicast,
2939 * promiscuous mode, and all-multi behavior.
2941 static void igb_set_rx_mode(struct net_device *netdev)
2943 struct igb_adapter *adapter = netdev_priv(netdev);
2944 struct e1000_hw *hw = &adapter->hw;
2945 unsigned int vfn = adapter->vfs_allocated_count;
2946 u32 rctl, vmolr = 0;
2949 /* Check for Promiscuous and All Multicast modes */
2950 rctl = rd32(E1000_RCTL);
2952 /* clear the effected bits */
2953 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2955 if (netdev->flags & IFF_PROMISC) {
2956 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2957 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2959 if (netdev->flags & IFF_ALLMULTI) {
2960 rctl |= E1000_RCTL_MPE;
2961 vmolr |= E1000_VMOLR_MPME;
2964 * Write addresses to the MTA, if the attempt fails
2965 * then we should just turn on promiscous mode so
2966 * that we can at least receive multicast traffic
2968 count = igb_write_mc_addr_list(netdev);
2970 rctl |= E1000_RCTL_MPE;
2971 vmolr |= E1000_VMOLR_MPME;
2973 vmolr |= E1000_VMOLR_ROMPE;
2977 * Write addresses to available RAR registers, if there is not
2978 * sufficient space to store all the addresses then enable
2979 * unicast promiscous mode
2981 count = igb_write_uc_addr_list(netdev);
2983 rctl |= E1000_RCTL_UPE;
2984 vmolr |= E1000_VMOLR_ROPE;
2986 rctl |= E1000_RCTL_VFE;
2988 wr32(E1000_RCTL, rctl);
2991 * In order to support SR-IOV and eventually VMDq it is necessary to set
2992 * the VMOLR to enable the appropriate modes. Without this workaround
2993 * we will have issues with VLAN tag stripping not being done for frames
2994 * that are only arriving because we are the default pool
2996 if (hw->mac.type < e1000_82576)
2999 vmolr |= rd32(E1000_VMOLR(vfn)) &
3000 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3001 wr32(E1000_VMOLR(vfn), vmolr);
3002 igb_restore_vf_multicasts(adapter);
3005 /* Need to wait a few seconds after link up to get diagnostic information from
3007 static void igb_update_phy_info(unsigned long data)
3009 struct igb_adapter *adapter = (struct igb_adapter *) data;
3010 igb_get_phy_info(&adapter->hw);
3014 * igb_has_link - check shared code for link and determine up/down
3015 * @adapter: pointer to driver private info
3017 bool igb_has_link(struct igb_adapter *adapter)
3019 struct e1000_hw *hw = &adapter->hw;
3020 bool link_active = false;
3023 /* get_link_status is set on LSC (link status) interrupt or
3024 * rx sequence error interrupt. get_link_status will stay
3025 * false until the e1000_check_for_link establishes link
3026 * for copper adapters ONLY
3028 switch (hw->phy.media_type) {
3029 case e1000_media_type_copper:
3030 if (hw->mac.get_link_status) {
3031 ret_val = hw->mac.ops.check_for_link(hw);
3032 link_active = !hw->mac.get_link_status;
3037 case e1000_media_type_internal_serdes:
3038 ret_val = hw->mac.ops.check_for_link(hw);
3039 link_active = hw->mac.serdes_has_link;
3042 case e1000_media_type_unknown:
3050 * igb_watchdog - Timer Call-back
3051 * @data: pointer to adapter cast into an unsigned long
3053 static void igb_watchdog(unsigned long data)
3055 struct igb_adapter *adapter = (struct igb_adapter *)data;
3056 /* Do the rest outside of interrupt context */
3057 schedule_work(&adapter->watchdog_task);
3060 static void igb_watchdog_task(struct work_struct *work)
3062 struct igb_adapter *adapter = container_of(work,
3065 struct e1000_hw *hw = &adapter->hw;
3066 struct net_device *netdev = adapter->netdev;
3070 link = igb_has_link(adapter);
3072 if (!netif_carrier_ok(netdev)) {
3074 hw->mac.ops.get_speed_and_duplex(hw,
3075 &adapter->link_speed,
3076 &adapter->link_duplex);
3078 ctrl = rd32(E1000_CTRL);
3079 /* Links status message must follow this format */
3080 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3081 "Flow Control: %s\n",
3083 adapter->link_speed,
3084 adapter->link_duplex == FULL_DUPLEX ?
3085 "Full Duplex" : "Half Duplex",
3086 ((ctrl & E1000_CTRL_TFCE) &&
3087 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3088 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3089 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3091 /* adjust timeout factor according to speed/duplex */
3092 adapter->tx_timeout_factor = 1;
3093 switch (adapter->link_speed) {
3095 adapter->tx_timeout_factor = 14;
3098 /* maybe add some timeout factor ? */
3102 netif_carrier_on(netdev);
3104 igb_ping_all_vfs(adapter);
3106 /* link state has changed, schedule phy info update */
3107 if (!test_bit(__IGB_DOWN, &adapter->state))
3108 mod_timer(&adapter->phy_info_timer,
3109 round_jiffies(jiffies + 2 * HZ));
3112 if (netif_carrier_ok(netdev)) {
3113 adapter->link_speed = 0;
3114 adapter->link_duplex = 0;
3115 /* Links status message must follow this format */
3116 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3118 netif_carrier_off(netdev);
3120 igb_ping_all_vfs(adapter);
3122 /* link state has changed, schedule phy info update */
3123 if (!test_bit(__IGB_DOWN, &adapter->state))
3124 mod_timer(&adapter->phy_info_timer,
3125 round_jiffies(jiffies + 2 * HZ));
3129 igb_update_stats(adapter);
3131 for (i = 0; i < adapter->num_tx_queues; i++) {
3132 struct igb_ring *tx_ring = adapter->tx_ring[i];
3133 if (!netif_carrier_ok(netdev)) {
3134 /* We've lost link, so the controller stops DMA,
3135 * but we've got queued Tx work that's never going
3136 * to get done, so reset controller to flush Tx.
3137 * (Do the reset outside of interrupt context). */
3138 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3139 adapter->tx_timeout_count++;
3140 schedule_work(&adapter->reset_task);
3141 /* return immediately since reset is imminent */
3146 /* Force detection of hung controller every watchdog period */
3147 tx_ring->detect_tx_hung = true;
3150 /* Cause software interrupt to ensure rx ring is cleaned */
3151 if (adapter->msix_entries) {
3153 for (i = 0; i < adapter->num_q_vectors; i++) {
3154 struct igb_q_vector *q_vector = adapter->q_vector[i];
3155 eics |= q_vector->eims_value;
3157 wr32(E1000_EICS, eics);
3159 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3162 /* Reset the timer */
3163 if (!test_bit(__IGB_DOWN, &adapter->state))
3164 mod_timer(&adapter->watchdog_timer,
3165 round_jiffies(jiffies + 2 * HZ));
3168 enum latency_range {
3172 latency_invalid = 255
3176 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3178 * Stores a new ITR value based on strictly on packet size. This
3179 * algorithm is less sophisticated than that used in igb_update_itr,
3180 * due to the difficulty of synchronizing statistics across multiple
3181 * receive rings. The divisors and thresholds used by this fuction
3182 * were determined based on theoretical maximum wire speed and testing
3183 * data, in order to minimize response time while increasing bulk
3185 * This functionality is controlled by the InterruptThrottleRate module
3186 * parameter (see igb_param.c)
3187 * NOTE: This function is called only when operating in a multiqueue
3188 * receive environment.
3189 * @q_vector: pointer to q_vector
3191 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3193 int new_val = q_vector->itr_val;
3194 int avg_wire_size = 0;
3195 struct igb_adapter *adapter = q_vector->adapter;
3197 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3198 * ints/sec - ITR timer value of 120 ticks.
3200 if (adapter->link_speed != SPEED_1000) {
3205 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3206 struct igb_ring *ring = q_vector->rx_ring;
3207 avg_wire_size = ring->total_bytes / ring->total_packets;
3210 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3211 struct igb_ring *ring = q_vector->tx_ring;
3212 avg_wire_size = max_t(u32, avg_wire_size,
3213 (ring->total_bytes /
3214 ring->total_packets));
3217 /* if avg_wire_size isn't set no work was done */
3221 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3222 avg_wire_size += 24;
3224 /* Don't starve jumbo frames */
3225 avg_wire_size = min(avg_wire_size, 3000);
3227 /* Give a little boost to mid-size frames */
3228 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3229 new_val = avg_wire_size / 3;
3231 new_val = avg_wire_size / 2;
3233 /* when in itr mode 3 do not exceed 20K ints/sec */
3234 if (adapter->rx_itr_setting == 3 && new_val < 196)
3238 if (new_val != q_vector->itr_val) {
3239 q_vector->itr_val = new_val;
3240 q_vector->set_itr = 1;
3243 if (q_vector->rx_ring) {
3244 q_vector->rx_ring->total_bytes = 0;
3245 q_vector->rx_ring->total_packets = 0;
3247 if (q_vector->tx_ring) {
3248 q_vector->tx_ring->total_bytes = 0;
3249 q_vector->tx_ring->total_packets = 0;
3254 * igb_update_itr - update the dynamic ITR value based on statistics
3255 * Stores a new ITR value based on packets and byte
3256 * counts during the last interrupt. The advantage of per interrupt
3257 * computation is faster updates and more accurate ITR for the current
3258 * traffic pattern. Constants in this function were computed
3259 * based on theoretical maximum wire speed and thresholds were set based
3260 * on testing data as well as attempting to minimize response time
3261 * while increasing bulk throughput.
3262 * this functionality is controlled by the InterruptThrottleRate module
3263 * parameter (see igb_param.c)
3264 * NOTE: These calculations are only valid when operating in a single-
3265 * queue environment.
3266 * @adapter: pointer to adapter
3267 * @itr_setting: current q_vector->itr_val
3268 * @packets: the number of packets during this measurement interval
3269 * @bytes: the number of bytes during this measurement interval
3271 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3272 int packets, int bytes)
3274 unsigned int retval = itr_setting;
3277 goto update_itr_done;
3279 switch (itr_setting) {
3280 case lowest_latency:
3281 /* handle TSO and jumbo frames */
3282 if (bytes/packets > 8000)
3283 retval = bulk_latency;
3284 else if ((packets < 5) && (bytes > 512))
3285 retval = low_latency;
3287 case low_latency: /* 50 usec aka 20000 ints/s */
3288 if (bytes > 10000) {
3289 /* this if handles the TSO accounting */
3290 if (bytes/packets > 8000) {
3291 retval = bulk_latency;
3292 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3293 retval = bulk_latency;
3294 } else if ((packets > 35)) {
3295 retval = lowest_latency;
3297 } else if (bytes/packets > 2000) {
3298 retval = bulk_latency;
3299 } else if (packets <= 2 && bytes < 512) {
3300 retval = lowest_latency;
3303 case bulk_latency: /* 250 usec aka 4000 ints/s */
3304 if (bytes > 25000) {
3306 retval = low_latency;
3307 } else if (bytes < 1500) {
3308 retval = low_latency;
3317 static void igb_set_itr(struct igb_adapter *adapter)
3319 struct igb_q_vector *q_vector = adapter->q_vector[0];
3321 u32 new_itr = q_vector->itr_val;
3323 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3324 if (adapter->link_speed != SPEED_1000) {
3330 adapter->rx_itr = igb_update_itr(adapter,
3332 q_vector->rx_ring->total_packets,
3333 q_vector->rx_ring->total_bytes);
3335 adapter->tx_itr = igb_update_itr(adapter,
3337 q_vector->tx_ring->total_packets,
3338 q_vector->tx_ring->total_bytes);
3339 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3341 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3342 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3343 current_itr = low_latency;
3345 switch (current_itr) {
3346 /* counts and packets in update_itr are dependent on these numbers */
3347 case lowest_latency:
3348 new_itr = 56; /* aka 70,000 ints/sec */
3351 new_itr = 196; /* aka 20,000 ints/sec */
3354 new_itr = 980; /* aka 4,000 ints/sec */
3361 q_vector->rx_ring->total_bytes = 0;
3362 q_vector->rx_ring->total_packets = 0;
3363 q_vector->tx_ring->total_bytes = 0;
3364 q_vector->tx_ring->total_packets = 0;
3366 if (new_itr != q_vector->itr_val) {
3367 /* this attempts to bias the interrupt rate towards Bulk
3368 * by adding intermediate steps when interrupt rate is
3370 new_itr = new_itr > q_vector->itr_val ?
3371 max((new_itr * q_vector->itr_val) /
3372 (new_itr + (q_vector->itr_val >> 2)),
3375 /* Don't write the value here; it resets the adapter's
3376 * internal timer, and causes us to delay far longer than
3377 * we should between interrupts. Instead, we write the ITR
3378 * value at the beginning of the next interrupt so the timing
3379 * ends up being correct.
3381 q_vector->itr_val = new_itr;
3382 q_vector->set_itr = 1;
3388 #define IGB_TX_FLAGS_CSUM 0x00000001
3389 #define IGB_TX_FLAGS_VLAN 0x00000002
3390 #define IGB_TX_FLAGS_TSO 0x00000004
3391 #define IGB_TX_FLAGS_IPV4 0x00000008
3392 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3393 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3394 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3396 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3397 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3399 struct e1000_adv_tx_context_desc *context_desc;
3402 struct igb_buffer *buffer_info;
3403 u32 info = 0, tu_cmd = 0;
3407 if (skb_header_cloned(skb)) {
3408 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3413 l4len = tcp_hdrlen(skb);
3416 if (skb->protocol == htons(ETH_P_IP)) {
3417 struct iphdr *iph = ip_hdr(skb);
3420 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3424 } else if (skb_is_gso_v6(skb)) {
3425 ipv6_hdr(skb)->payload_len = 0;
3426 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3427 &ipv6_hdr(skb)->daddr,
3431 i = tx_ring->next_to_use;
3433 buffer_info = &tx_ring->buffer_info[i];
3434 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3435 /* VLAN MACLEN IPLEN */
3436 if (tx_flags & IGB_TX_FLAGS_VLAN)
3437 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3438 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3439 *hdr_len += skb_network_offset(skb);
3440 info |= skb_network_header_len(skb);
3441 *hdr_len += skb_network_header_len(skb);
3442 context_desc->vlan_macip_lens = cpu_to_le32(info);
3444 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3445 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3447 if (skb->protocol == htons(ETH_P_IP))
3448 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3449 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3451 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3454 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3455 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3457 /* For 82575, context index must be unique per ring. */
3458 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3459 mss_l4len_idx |= tx_ring->reg_idx << 4;
3461 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3462 context_desc->seqnum_seed = 0;
3464 buffer_info->time_stamp = jiffies;
3465 buffer_info->next_to_watch = i;
3466 buffer_info->dma = 0;
3468 if (i == tx_ring->count)
3471 tx_ring->next_to_use = i;
3476 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3477 struct sk_buff *skb, u32 tx_flags)
3479 struct e1000_adv_tx_context_desc *context_desc;
3480 struct pci_dev *pdev = tx_ring->pdev;
3481 struct igb_buffer *buffer_info;
3482 u32 info = 0, tu_cmd = 0;
3485 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3486 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3487 i = tx_ring->next_to_use;
3488 buffer_info = &tx_ring->buffer_info[i];
3489 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3491 if (tx_flags & IGB_TX_FLAGS_VLAN)
3492 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3494 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3495 if (skb->ip_summed == CHECKSUM_PARTIAL)
3496 info |= skb_network_header_len(skb);
3498 context_desc->vlan_macip_lens = cpu_to_le32(info);
3500 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3502 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3505 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3506 const struct vlan_ethhdr *vhdr =
3507 (const struct vlan_ethhdr*)skb->data;
3509 protocol = vhdr->h_vlan_encapsulated_proto;
3511 protocol = skb->protocol;
3515 case cpu_to_be16(ETH_P_IP):
3516 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3517 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3518 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3519 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3520 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3522 case cpu_to_be16(ETH_P_IPV6):
3523 /* XXX what about other V6 headers?? */
3524 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3525 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3526 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3527 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3530 if (unlikely(net_ratelimit()))
3531 dev_warn(&pdev->dev,
3532 "partial checksum but proto=%x!\n",
3538 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3539 context_desc->seqnum_seed = 0;
3540 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3541 context_desc->mss_l4len_idx =
3542 cpu_to_le32(tx_ring->reg_idx << 4);
3544 buffer_info->time_stamp = jiffies;
3545 buffer_info->next_to_watch = i;
3546 buffer_info->dma = 0;
3549 if (i == tx_ring->count)
3551 tx_ring->next_to_use = i;
3558 #define IGB_MAX_TXD_PWR 16
3559 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3561 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3564 struct igb_buffer *buffer_info;
3565 struct pci_dev *pdev = tx_ring->pdev;
3566 unsigned int len = skb_headlen(skb);
3567 unsigned int count = 0, i;
3570 i = tx_ring->next_to_use;
3572 buffer_info = &tx_ring->buffer_info[i];
3573 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3574 buffer_info->length = len;
3575 /* set time_stamp *before* dma to help avoid a possible race */
3576 buffer_info->time_stamp = jiffies;
3577 buffer_info->next_to_watch = i;
3578 buffer_info->dma = pci_map_single(pdev, skb->data, len,
3580 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3583 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3584 struct skb_frag_struct *frag;
3588 if (i == tx_ring->count)
3591 frag = &skb_shinfo(skb)->frags[f];
3594 buffer_info = &tx_ring->buffer_info[i];
3595 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3596 buffer_info->length = len;
3597 buffer_info->time_stamp = jiffies;
3598 buffer_info->next_to_watch = i;
3599 buffer_info->mapped_as_page = true;
3600 buffer_info->dma = pci_map_page(pdev,
3605 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3610 tx_ring->buffer_info[i].skb = skb;
3611 tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3612 tx_ring->buffer_info[first].next_to_watch = i;
3617 dev_err(&pdev->dev, "TX DMA map failed\n");
3619 /* clear timestamp and dma mappings for failed buffer_info mapping */
3620 buffer_info->dma = 0;
3621 buffer_info->time_stamp = 0;
3622 buffer_info->length = 0;
3623 buffer_info->next_to_watch = 0;
3624 buffer_info->mapped_as_page = false;
3626 /* clear timestamp and dma mappings for remaining portion of packet */
3631 buffer_info = &tx_ring->buffer_info[i];
3632 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3638 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3639 u32 tx_flags, int count, u32 paylen,
3642 union e1000_adv_tx_desc *tx_desc;
3643 struct igb_buffer *buffer_info;
3644 u32 olinfo_status = 0, cmd_type_len;
3645 unsigned int i = tx_ring->next_to_use;
3647 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3648 E1000_ADVTXD_DCMD_DEXT);
3650 if (tx_flags & IGB_TX_FLAGS_VLAN)
3651 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3653 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3654 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3656 if (tx_flags & IGB_TX_FLAGS_TSO) {
3657 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3659 /* insert tcp checksum */
3660 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3662 /* insert ip checksum */
3663 if (tx_flags & IGB_TX_FLAGS_IPV4)
3664 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3666 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3667 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3670 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3671 (tx_flags & (IGB_TX_FLAGS_CSUM |
3673 IGB_TX_FLAGS_VLAN)))
3674 olinfo_status |= tx_ring->reg_idx << 4;
3676 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3679 buffer_info = &tx_ring->buffer_info[i];
3680 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3681 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3682 tx_desc->read.cmd_type_len =
3683 cpu_to_le32(cmd_type_len | buffer_info->length);
3684 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3687 if (i == tx_ring->count)
3689 } while (count > 0);
3691 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3692 /* Force memory writes to complete before letting h/w
3693 * know there are new descriptors to fetch. (Only
3694 * applicable for weak-ordered memory model archs,
3695 * such as IA-64). */
3698 tx_ring->next_to_use = i;
3699 writel(i, tx_ring->tail);
3700 /* we need this if more than one processor can write to our tail
3701 * at a time, it syncronizes IO on IA64/Altix systems */
3705 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3707 struct net_device *netdev = tx_ring->netdev;
3709 netif_stop_subqueue(netdev, tx_ring->queue_index);
3711 /* Herbert's original patch had:
3712 * smp_mb__after_netif_stop_queue();
3713 * but since that doesn't exist yet, just open code it. */
3716 /* We need to check again in a case another CPU has just
3717 * made room available. */
3718 if (igb_desc_unused(tx_ring) < size)
3722 netif_wake_subqueue(netdev, tx_ring->queue_index);
3723 tx_ring->tx_stats.restart_queue++;
3727 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3729 if (igb_desc_unused(tx_ring) >= size)
3731 return __igb_maybe_stop_tx(tx_ring, size);
3734 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3735 struct igb_ring *tx_ring)
3737 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3742 union skb_shared_tx *shtx = skb_tx(skb);
3744 /* need: 1 descriptor per page,
3745 * + 2 desc gap to keep tail from touching head,
3746 * + 1 desc for skb->data,
3747 * + 1 desc for context descriptor,
3748 * otherwise try next time */
3749 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3750 /* this is a hard error */
3751 return NETDEV_TX_BUSY;
3754 if (unlikely(shtx->hardware)) {
3755 shtx->in_progress = 1;
3756 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3759 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3760 tx_flags |= IGB_TX_FLAGS_VLAN;
3761 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3764 if (skb->protocol == htons(ETH_P_IP))
3765 tx_flags |= IGB_TX_FLAGS_IPV4;
3767 first = tx_ring->next_to_use;
3768 if (skb_is_gso(skb)) {
3769 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3772 dev_kfree_skb_any(skb);
3773 return NETDEV_TX_OK;
3778 tx_flags |= IGB_TX_FLAGS_TSO;
3779 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3780 (skb->ip_summed == CHECKSUM_PARTIAL))
3781 tx_flags |= IGB_TX_FLAGS_CSUM;
3784 * count reflects descriptors mapped, if 0 or less then mapping error
3785 * has occured and we need to rewind the descriptor queue
3787 count = igb_tx_map_adv(tx_ring, skb, first);
3789 dev_kfree_skb_any(skb);
3790 tx_ring->buffer_info[first].time_stamp = 0;
3791 tx_ring->next_to_use = first;
3792 return NETDEV_TX_OK;
3795 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3797 /* Make sure there is space in the ring for the next send. */
3798 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3800 return NETDEV_TX_OK;
3803 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3804 struct net_device *netdev)
3806 struct igb_adapter *adapter = netdev_priv(netdev);
3807 struct igb_ring *tx_ring;
3810 if (test_bit(__IGB_DOWN, &adapter->state)) {
3811 dev_kfree_skb_any(skb);
3812 return NETDEV_TX_OK;
3815 if (skb->len <= 0) {
3816 dev_kfree_skb_any(skb);
3817 return NETDEV_TX_OK;
3820 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3821 tx_ring = adapter->multi_tx_table[r_idx];
3823 /* This goes back to the question of how to logically map a tx queue
3824 * to a flow. Right now, performance is impacted slightly negatively
3825 * if using multiple tx queues. If the stack breaks away from a
3826 * single qdisc implementation, we can look at this again. */
3827 return igb_xmit_frame_ring_adv(skb, tx_ring);
3831 * igb_tx_timeout - Respond to a Tx Hang
3832 * @netdev: network interface device structure
3834 static void igb_tx_timeout(struct net_device *netdev)
3836 struct igb_adapter *adapter = netdev_priv(netdev);
3837 struct e1000_hw *hw = &adapter->hw;
3839 /* Do the reset outside of interrupt context */
3840 adapter->tx_timeout_count++;
3842 if (hw->mac.type == e1000_82580)
3843 hw->dev_spec._82575.global_device_reset = true;
3845 schedule_work(&adapter->reset_task);
3847 (adapter->eims_enable_mask & ~adapter->eims_other));
3850 static void igb_reset_task(struct work_struct *work)
3852 struct igb_adapter *adapter;
3853 adapter = container_of(work, struct igb_adapter, reset_task);
3855 igb_reinit_locked(adapter);
3859 * igb_get_stats - Get System Network Statistics
3860 * @netdev: network interface device structure
3862 * Returns the address of the device statistics structure.
3863 * The statistics are actually updated from the timer callback.
3865 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3867 /* only return the current stats */
3868 return &netdev->stats;
3872 * igb_change_mtu - Change the Maximum Transfer Unit
3873 * @netdev: network interface device structure
3874 * @new_mtu: new value for maximum frame size
3876 * Returns 0 on success, negative on failure
3878 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3880 struct igb_adapter *adapter = netdev_priv(netdev);
3881 struct pci_dev *pdev = adapter->pdev;
3882 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3883 u32 rx_buffer_len, i;
3885 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3886 dev_err(&pdev->dev, "Invalid MTU setting\n");
3890 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3891 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3895 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3898 /* igb_down has a dependency on max_frame_size */
3899 adapter->max_frame_size = max_frame;
3901 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3902 * means we reserve 2 more, this pushes us to allocate from the next
3904 * i.e. RXBUFFER_2048 --> size-4096 slab
3907 if (adapter->hw.mac.type == e1000_82580)
3908 max_frame += IGB_TS_HDR_LEN;
3910 if (max_frame <= IGB_RXBUFFER_1024)
3911 rx_buffer_len = IGB_RXBUFFER_1024;
3912 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3913 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3915 rx_buffer_len = IGB_RXBUFFER_128;
3917 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
3918 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
3919 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
3921 if ((adapter->hw.mac.type == e1000_82580) &&
3922 (rx_buffer_len == IGB_RXBUFFER_128))
3923 rx_buffer_len += IGB_RXBUFFER_64;
3925 if (netif_running(netdev))
3928 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3929 netdev->mtu, new_mtu);
3930 netdev->mtu = new_mtu;
3932 for (i = 0; i < adapter->num_rx_queues; i++)
3933 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3935 if (netif_running(netdev))
3940 clear_bit(__IGB_RESETTING, &adapter->state);
3946 * igb_update_stats - Update the board statistics counters
3947 * @adapter: board private structure
3950 void igb_update_stats(struct igb_adapter *adapter)
3952 struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3953 struct e1000_hw *hw = &adapter->hw;
3954 struct pci_dev *pdev = adapter->pdev;
3960 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3963 * Prevent stats update while adapter is being reset, or if the pci
3964 * connection is down.
3966 if (adapter->link_speed == 0)
3968 if (pci_channel_offline(pdev))
3973 for (i = 0; i < adapter->num_rx_queues; i++) {
3974 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3975 struct igb_ring *ring = adapter->rx_ring[i];
3976 ring->rx_stats.drops += rqdpc_tmp;
3977 net_stats->rx_fifo_errors += rqdpc_tmp;
3978 bytes += ring->rx_stats.bytes;
3979 packets += ring->rx_stats.packets;
3982 net_stats->rx_bytes = bytes;
3983 net_stats->rx_packets = packets;
3987 for (i = 0; i < adapter->num_tx_queues; i++) {
3988 struct igb_ring *ring = adapter->tx_ring[i];
3989 bytes += ring->tx_stats.bytes;
3990 packets += ring->tx_stats.packets;
3992 net_stats->tx_bytes = bytes;
3993 net_stats->tx_packets = packets;
3995 /* read stats registers */
3996 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3997 adapter->stats.gprc += rd32(E1000_GPRC);
3998 adapter->stats.gorc += rd32(E1000_GORCL);
3999 rd32(E1000_GORCH); /* clear GORCL */
4000 adapter->stats.bprc += rd32(E1000_BPRC);
4001 adapter->stats.mprc += rd32(E1000_MPRC);
4002 adapter->stats.roc += rd32(E1000_ROC);
4004 adapter->stats.prc64 += rd32(E1000_PRC64);
4005 adapter->stats.prc127 += rd32(E1000_PRC127);
4006 adapter->stats.prc255 += rd32(E1000_PRC255);
4007 adapter->stats.prc511 += rd32(E1000_PRC511);
4008 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4009 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4010 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4011 adapter->stats.sec += rd32(E1000_SEC);
4013 mpc = rd32(E1000_MPC);
4014 adapter->stats.mpc += mpc;
4015 net_stats->rx_fifo_errors += mpc;
4016 adapter->stats.scc += rd32(E1000_SCC);
4017 adapter->stats.ecol += rd32(E1000_ECOL);
4018 adapter->stats.mcc += rd32(E1000_MCC);
4019 adapter->stats.latecol += rd32(E1000_LATECOL);
4020 adapter->stats.dc += rd32(E1000_DC);
4021 adapter->stats.rlec += rd32(E1000_RLEC);
4022 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4023 adapter->stats.xontxc += rd32(E1000_XONTXC);
4024 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4025 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4026 adapter->stats.fcruc += rd32(E1000_FCRUC);
4027 adapter->stats.gptc += rd32(E1000_GPTC);
4028 adapter->stats.gotc += rd32(E1000_GOTCL);
4029 rd32(E1000_GOTCH); /* clear GOTCL */
4030 adapter->stats.rnbc += rd32(E1000_RNBC);
4031 adapter->stats.ruc += rd32(E1000_RUC);
4032 adapter->stats.rfc += rd32(E1000_RFC);
4033 adapter->stats.rjc += rd32(E1000_RJC);
4034 adapter->stats.tor += rd32(E1000_TORH);
4035 adapter->stats.tot += rd32(E1000_TOTH);
4036 adapter->stats.tpr += rd32(E1000_TPR);
4038 adapter->stats.ptc64 += rd32(E1000_PTC64);
4039 adapter->stats.ptc127 += rd32(E1000_PTC127);
4040 adapter->stats.ptc255 += rd32(E1000_PTC255);
4041 adapter->stats.ptc511 += rd32(E1000_PTC511);
4042 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4043 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4045 adapter->stats.mptc += rd32(E1000_MPTC);
4046 adapter->stats.bptc += rd32(E1000_BPTC);
4048 adapter->stats.tpt += rd32(E1000_TPT);
4049 adapter->stats.colc += rd32(E1000_COLC);
4051 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4052 /* read internal phy specific stats */
4053 reg = rd32(E1000_CTRL_EXT);
4054 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4055 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4056 adapter->stats.tncrs += rd32(E1000_TNCRS);
4059 adapter->stats.tsctc += rd32(E1000_TSCTC);
4060 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4062 adapter->stats.iac += rd32(E1000_IAC);
4063 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4064 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4065 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4066 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4067 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4068 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4069 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4070 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4072 /* Fill out the OS statistics structure */
4073 net_stats->multicast = adapter->stats.mprc;
4074 net_stats->collisions = adapter->stats.colc;
4078 /* RLEC on some newer hardware can be incorrect so build
4079 * our own version based on RUC and ROC */
4080 net_stats->rx_errors = adapter->stats.rxerrc +
4081 adapter->stats.crcerrs + adapter->stats.algnerrc +
4082 adapter->stats.ruc + adapter->stats.roc +
4083 adapter->stats.cexterr;
4084 net_stats->rx_length_errors = adapter->stats.ruc +
4086 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4087 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4088 net_stats->rx_missed_errors = adapter->stats.mpc;
4091 net_stats->tx_errors = adapter->stats.ecol +
4092 adapter->stats.latecol;
4093 net_stats->tx_aborted_errors = adapter->stats.ecol;
4094 net_stats->tx_window_errors = adapter->stats.latecol;
4095 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4097 /* Tx Dropped needs to be maintained elsewhere */
4100 if (hw->phy.media_type == e1000_media_type_copper) {
4101 if ((adapter->link_speed == SPEED_1000) &&
4102 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4103 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4104 adapter->phy_stats.idle_errors += phy_tmp;
4108 /* Management Stats */
4109 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4110 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4111 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4114 static irqreturn_t igb_msix_other(int irq, void *data)
4116 struct igb_adapter *adapter = data;
4117 struct e1000_hw *hw = &adapter->hw;
4118 u32 icr = rd32(E1000_ICR);
4119 /* reading ICR causes bit 31 of EICR to be cleared */
4121 if (icr & E1000_ICR_DRSTA)
4122 schedule_work(&adapter->reset_task);
4124 if (icr & E1000_ICR_DOUTSYNC) {
4125 /* HW is reporting DMA is out of sync */
4126 adapter->stats.doosync++;
4129 /* Check for a mailbox event */
4130 if (icr & E1000_ICR_VMMB)
4131 igb_msg_task(adapter);
4133 if (icr & E1000_ICR_LSC) {
4134 hw->mac.get_link_status = 1;
4135 /* guard against interrupt when we're going down */
4136 if (!test_bit(__IGB_DOWN, &adapter->state))
4137 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4140 if (adapter->vfs_allocated_count)
4141 wr32(E1000_IMS, E1000_IMS_LSC |
4143 E1000_IMS_DOUTSYNC);
4145 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4146 wr32(E1000_EIMS, adapter->eims_other);
4151 static void igb_write_itr(struct igb_q_vector *q_vector)
4153 struct igb_adapter *adapter = q_vector->adapter;
4154 u32 itr_val = q_vector->itr_val & 0x7FFC;
4156 if (!q_vector->set_itr)
4162 if (adapter->hw.mac.type == e1000_82575)
4163 itr_val |= itr_val << 16;
4165 itr_val |= 0x8000000;
4167 writel(itr_val, q_vector->itr_register);
4168 q_vector->set_itr = 0;
4171 static irqreturn_t igb_msix_ring(int irq, void *data)
4173 struct igb_q_vector *q_vector = data;
4175 /* Write the ITR value calculated from the previous interrupt. */
4176 igb_write_itr(q_vector);
4178 napi_schedule(&q_vector->napi);
4183 #ifdef CONFIG_IGB_DCA
4184 static void igb_update_dca(struct igb_q_vector *q_vector)
4186 struct igb_adapter *adapter = q_vector->adapter;
4187 struct e1000_hw *hw = &adapter->hw;
4188 int cpu = get_cpu();
4190 if (q_vector->cpu == cpu)
4193 if (q_vector->tx_ring) {
4194 int q = q_vector->tx_ring->reg_idx;
4195 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4196 if (hw->mac.type == e1000_82575) {
4197 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4198 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4200 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4201 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4202 E1000_DCA_TXCTRL_CPUID_SHIFT;
4204 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4205 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4207 if (q_vector->rx_ring) {
4208 int q = q_vector->rx_ring->reg_idx;
4209 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4210 if (hw->mac.type == e1000_82575) {
4211 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4212 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4214 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4215 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4216 E1000_DCA_RXCTRL_CPUID_SHIFT;
4218 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4219 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4220 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4221 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4223 q_vector->cpu = cpu;
4228 static void igb_setup_dca(struct igb_adapter *adapter)
4230 struct e1000_hw *hw = &adapter->hw;
4233 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4236 /* Always use CB2 mode, difference is masked in the CB driver. */
4237 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4239 for (i = 0; i < adapter->num_q_vectors; i++) {
4240 adapter->q_vector[i]->cpu = -1;
4241 igb_update_dca(adapter->q_vector[i]);
4245 static int __igb_notify_dca(struct device *dev, void *data)
4247 struct net_device *netdev = dev_get_drvdata(dev);
4248 struct igb_adapter *adapter = netdev_priv(netdev);
4249 struct pci_dev *pdev = adapter->pdev;
4250 struct e1000_hw *hw = &adapter->hw;
4251 unsigned long event = *(unsigned long *)data;
4254 case DCA_PROVIDER_ADD:
4255 /* if already enabled, don't do it again */
4256 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4258 if (dca_add_requester(dev) == 0) {
4259 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4260 dev_info(&pdev->dev, "DCA enabled\n");
4261 igb_setup_dca(adapter);
4264 /* Fall Through since DCA is disabled. */
4265 case DCA_PROVIDER_REMOVE:
4266 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4267 /* without this a class_device is left
4268 * hanging around in the sysfs model */
4269 dca_remove_requester(dev);
4270 dev_info(&pdev->dev, "DCA disabled\n");
4271 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4272 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4280 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4285 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4288 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4290 #endif /* CONFIG_IGB_DCA */
4292 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4294 struct e1000_hw *hw = &adapter->hw;
4298 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4299 ping = E1000_PF_CONTROL_MSG;
4300 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4301 ping |= E1000_VT_MSGTYPE_CTS;
4302 igb_write_mbx(hw, &ping, 1, i);
4306 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4308 struct e1000_hw *hw = &adapter->hw;
4309 u32 vmolr = rd32(E1000_VMOLR(vf));
4310 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4312 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4313 IGB_VF_FLAG_MULTI_PROMISC);
4314 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4316 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4317 vmolr |= E1000_VMOLR_MPME;
4318 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4321 * if we have hashes and we are clearing a multicast promisc
4322 * flag we need to write the hashes to the MTA as this step
4323 * was previously skipped
4325 if (vf_data->num_vf_mc_hashes > 30) {
4326 vmolr |= E1000_VMOLR_MPME;
4327 } else if (vf_data->num_vf_mc_hashes) {
4329 vmolr |= E1000_VMOLR_ROMPE;
4330 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4331 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4335 wr32(E1000_VMOLR(vf), vmolr);
4337 /* there are flags left unprocessed, likely not supported */
4338 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4345 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4346 u32 *msgbuf, u32 vf)
4348 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4349 u16 *hash_list = (u16 *)&msgbuf[1];
4350 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4353 /* salt away the number of multicast addresses assigned
4354 * to this VF for later use to restore when the PF multi cast
4357 vf_data->num_vf_mc_hashes = n;
4359 /* only up to 30 hash values supported */
4363 /* store the hashes for later use */
4364 for (i = 0; i < n; i++)
4365 vf_data->vf_mc_hashes[i] = hash_list[i];
4367 /* Flush and reset the mta with the new values */
4368 igb_set_rx_mode(adapter->netdev);
4373 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4375 struct e1000_hw *hw = &adapter->hw;
4376 struct vf_data_storage *vf_data;
4379 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4380 u32 vmolr = rd32(E1000_VMOLR(i));
4381 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4383 vf_data = &adapter->vf_data[i];
4385 if ((vf_data->num_vf_mc_hashes > 30) ||
4386 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4387 vmolr |= E1000_VMOLR_MPME;
4388 } else if (vf_data->num_vf_mc_hashes) {
4389 vmolr |= E1000_VMOLR_ROMPE;
4390 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4391 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4393 wr32(E1000_VMOLR(i), vmolr);
4397 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4399 struct e1000_hw *hw = &adapter->hw;
4400 u32 pool_mask, reg, vid;
4403 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4405 /* Find the vlan filter for this id */
4406 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4407 reg = rd32(E1000_VLVF(i));
4409 /* remove the vf from the pool */
4412 /* if pool is empty then remove entry from vfta */
4413 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4414 (reg & E1000_VLVF_VLANID_ENABLE)) {
4416 vid = reg & E1000_VLVF_VLANID_MASK;
4417 igb_vfta_set(hw, vid, false);
4420 wr32(E1000_VLVF(i), reg);
4423 adapter->vf_data[vf].vlans_enabled = 0;
4426 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4428 struct e1000_hw *hw = &adapter->hw;
4431 /* The vlvf table only exists on 82576 hardware and newer */
4432 if (hw->mac.type < e1000_82576)
4435 /* we only need to do this if VMDq is enabled */
4436 if (!adapter->vfs_allocated_count)
4439 /* Find the vlan filter for this id */
4440 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4441 reg = rd32(E1000_VLVF(i));
4442 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4443 vid == (reg & E1000_VLVF_VLANID_MASK))
4448 if (i == E1000_VLVF_ARRAY_SIZE) {
4449 /* Did not find a matching VLAN ID entry that was
4450 * enabled. Search for a free filter entry, i.e.
4451 * one without the enable bit set
4453 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4454 reg = rd32(E1000_VLVF(i));
4455 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4459 if (i < E1000_VLVF_ARRAY_SIZE) {
4460 /* Found an enabled/available entry */
4461 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4463 /* if !enabled we need to set this up in vfta */
4464 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4465 /* add VID to filter table */
4466 igb_vfta_set(hw, vid, true);
4467 reg |= E1000_VLVF_VLANID_ENABLE;
4469 reg &= ~E1000_VLVF_VLANID_MASK;
4471 wr32(E1000_VLVF(i), reg);
4473 /* do not modify RLPML for PF devices */
4474 if (vf >= adapter->vfs_allocated_count)
4477 if (!adapter->vf_data[vf].vlans_enabled) {
4479 reg = rd32(E1000_VMOLR(vf));
4480 size = reg & E1000_VMOLR_RLPML_MASK;
4482 reg &= ~E1000_VMOLR_RLPML_MASK;
4484 wr32(E1000_VMOLR(vf), reg);
4487 adapter->vf_data[vf].vlans_enabled++;
4491 if (i < E1000_VLVF_ARRAY_SIZE) {
4492 /* remove vf from the pool */
4493 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4494 /* if pool is empty then remove entry from vfta */
4495 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4497 igb_vfta_set(hw, vid, false);
4499 wr32(E1000_VLVF(i), reg);
4501 /* do not modify RLPML for PF devices */
4502 if (vf >= adapter->vfs_allocated_count)
4505 adapter->vf_data[vf].vlans_enabled--;
4506 if (!adapter->vf_data[vf].vlans_enabled) {
4508 reg = rd32(E1000_VMOLR(vf));
4509 size = reg & E1000_VMOLR_RLPML_MASK;
4511 reg &= ~E1000_VMOLR_RLPML_MASK;
4513 wr32(E1000_VMOLR(vf), reg);
4520 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4522 struct e1000_hw *hw = &adapter->hw;
4525 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4527 wr32(E1000_VMVIR(vf), 0);
4530 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4531 int vf, u16 vlan, u8 qos)
4534 struct igb_adapter *adapter = netdev_priv(netdev);
4536 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4539 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4542 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4543 igb_set_vmolr(adapter, vf, !vlan);
4544 adapter->vf_data[vf].pf_vlan = vlan;
4545 adapter->vf_data[vf].pf_qos = qos;
4546 dev_info(&adapter->pdev->dev,
4547 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4548 if (test_bit(__IGB_DOWN, &adapter->state)) {
4549 dev_warn(&adapter->pdev->dev,
4550 "The VF VLAN has been set,"
4551 " but the PF device is not up.\n");
4552 dev_warn(&adapter->pdev->dev,
4553 "Bring the PF device up before"
4554 " attempting to use the VF device.\n");
4557 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4559 igb_set_vmvir(adapter, vlan, vf);
4560 igb_set_vmolr(adapter, vf, true);
4561 adapter->vf_data[vf].pf_vlan = 0;
4562 adapter->vf_data[vf].pf_qos = 0;
4568 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4570 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4571 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4573 return igb_vlvf_set(adapter, vid, add, vf);
4576 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4579 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4580 adapter->vf_data[vf].last_nack = jiffies;
4582 /* reset offloads to defaults */
4583 igb_set_vmolr(adapter, vf, true);
4585 /* reset vlans for device */
4586 igb_clear_vf_vfta(adapter, vf);
4587 if (adapter->vf_data[vf].pf_vlan)
4588 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4589 adapter->vf_data[vf].pf_vlan,
4590 adapter->vf_data[vf].pf_qos);
4592 igb_clear_vf_vfta(adapter, vf);
4594 /* reset multicast table array for vf */
4595 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4597 /* Flush and reset the mta with the new values */
4598 igb_set_rx_mode(adapter->netdev);
4601 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4603 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4605 /* generate a new mac address as we were hotplug removed/added */
4606 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4607 random_ether_addr(vf_mac);
4609 /* process remaining reset events */
4610 igb_vf_reset(adapter, vf);
4613 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4615 struct e1000_hw *hw = &adapter->hw;
4616 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4617 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4619 u8 *addr = (u8 *)(&msgbuf[1]);
4621 /* process all the same items cleared in a function level reset */
4622 igb_vf_reset(adapter, vf);
4624 /* set vf mac address */
4625 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4627 /* enable transmit and receive for vf */
4628 reg = rd32(E1000_VFTE);
4629 wr32(E1000_VFTE, reg | (1 << vf));
4630 reg = rd32(E1000_VFRE);
4631 wr32(E1000_VFRE, reg | (1 << vf));
4633 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4635 /* reply to reset with ack and vf mac address */
4636 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4637 memcpy(addr, vf_mac, 6);
4638 igb_write_mbx(hw, msgbuf, 3, vf);
4641 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4643 unsigned char *addr = (char *)&msg[1];
4646 if (is_valid_ether_addr(addr))
4647 err = igb_set_vf_mac(adapter, vf, addr);
4652 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4654 struct e1000_hw *hw = &adapter->hw;
4655 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4656 u32 msg = E1000_VT_MSGTYPE_NACK;
4658 /* if device isn't clear to send it shouldn't be reading either */
4659 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4660 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4661 igb_write_mbx(hw, &msg, 1, vf);
4662 vf_data->last_nack = jiffies;
4666 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4668 struct pci_dev *pdev = adapter->pdev;
4669 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4670 struct e1000_hw *hw = &adapter->hw;
4671 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4674 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4677 /* if receive failed revoke VF CTS stats and restart init */
4678 dev_err(&pdev->dev, "Error receiving message from VF\n");
4679 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4680 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4685 /* this is a message we already processed, do nothing */
4686 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4690 * until the vf completes a reset it should not be
4691 * allowed to start any configuration.
4694 if (msgbuf[0] == E1000_VF_RESET) {
4695 igb_vf_reset_msg(adapter, vf);
4699 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4700 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4706 switch ((msgbuf[0] & 0xFFFF)) {
4707 case E1000_VF_SET_MAC_ADDR:
4708 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4710 case E1000_VF_SET_PROMISC:
4711 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4713 case E1000_VF_SET_MULTICAST:
4714 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4716 case E1000_VF_SET_LPE:
4717 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4719 case E1000_VF_SET_VLAN:
4720 if (adapter->vf_data[vf].pf_vlan)
4723 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4726 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4731 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4733 /* notify the VF of the results of what it sent us */
4735 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4737 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4739 igb_write_mbx(hw, msgbuf, 1, vf);
4742 static void igb_msg_task(struct igb_adapter *adapter)
4744 struct e1000_hw *hw = &adapter->hw;
4747 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4748 /* process any reset requests */
4749 if (!igb_check_for_rst(hw, vf))
4750 igb_vf_reset_event(adapter, vf);
4752 /* process any messages pending */
4753 if (!igb_check_for_msg(hw, vf))
4754 igb_rcv_msg_from_vf(adapter, vf);
4756 /* process any acks */
4757 if (!igb_check_for_ack(hw, vf))
4758 igb_rcv_ack_from_vf(adapter, vf);
4763 * igb_set_uta - Set unicast filter table address
4764 * @adapter: board private structure
4766 * The unicast table address is a register array of 32-bit registers.
4767 * The table is meant to be used in a way similar to how the MTA is used
4768 * however due to certain limitations in the hardware it is necessary to
4769 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4770 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4772 static void igb_set_uta(struct igb_adapter *adapter)
4774 struct e1000_hw *hw = &adapter->hw;
4777 /* The UTA table only exists on 82576 hardware and newer */
4778 if (hw->mac.type < e1000_82576)
4781 /* we only need to do this if VMDq is enabled */
4782 if (!adapter->vfs_allocated_count)
4785 for (i = 0; i < hw->mac.uta_reg_count; i++)
4786 array_wr32(E1000_UTA, i, ~0);
4790 * igb_intr_msi - Interrupt Handler
4791 * @irq: interrupt number
4792 * @data: pointer to a network interface device structure
4794 static irqreturn_t igb_intr_msi(int irq, void *data)
4796 struct igb_adapter *adapter = data;
4797 struct igb_q_vector *q_vector = adapter->q_vector[0];
4798 struct e1000_hw *hw = &adapter->hw;
4799 /* read ICR disables interrupts using IAM */
4800 u32 icr = rd32(E1000_ICR);
4802 igb_write_itr(q_vector);
4804 if (icr & E1000_ICR_DRSTA)
4805 schedule_work(&adapter->reset_task);
4807 if (icr & E1000_ICR_DOUTSYNC) {
4808 /* HW is reporting DMA is out of sync */
4809 adapter->stats.doosync++;
4812 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4813 hw->mac.get_link_status = 1;
4814 if (!test_bit(__IGB_DOWN, &adapter->state))
4815 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4818 napi_schedule(&q_vector->napi);
4824 * igb_intr - Legacy Interrupt Handler
4825 * @irq: interrupt number
4826 * @data: pointer to a network interface device structure
4828 static irqreturn_t igb_intr(int irq, void *data)
4830 struct igb_adapter *adapter = data;
4831 struct igb_q_vector *q_vector = adapter->q_vector[0];
4832 struct e1000_hw *hw = &adapter->hw;
4833 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4834 * need for the IMC write */
4835 u32 icr = rd32(E1000_ICR);
4837 return IRQ_NONE; /* Not our interrupt */
4839 igb_write_itr(q_vector);
4841 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4842 * not set, then the adapter didn't send an interrupt */
4843 if (!(icr & E1000_ICR_INT_ASSERTED))
4846 if (icr & E1000_ICR_DRSTA)
4847 schedule_work(&adapter->reset_task);
4849 if (icr & E1000_ICR_DOUTSYNC) {
4850 /* HW is reporting DMA is out of sync */
4851 adapter->stats.doosync++;
4854 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4855 hw->mac.get_link_status = 1;
4856 /* guard against interrupt when we're going down */
4857 if (!test_bit(__IGB_DOWN, &adapter->state))
4858 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4861 napi_schedule(&q_vector->napi);
4866 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4868 struct igb_adapter *adapter = q_vector->adapter;
4869 struct e1000_hw *hw = &adapter->hw;
4871 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4872 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4873 if (!adapter->msix_entries)
4874 igb_set_itr(adapter);
4876 igb_update_ring_itr(q_vector);
4879 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4880 if (adapter->msix_entries)
4881 wr32(E1000_EIMS, q_vector->eims_value);
4883 igb_irq_enable(adapter);
4888 * igb_poll - NAPI Rx polling callback
4889 * @napi: napi polling structure
4890 * @budget: count of how many packets we should handle
4892 static int igb_poll(struct napi_struct *napi, int budget)
4894 struct igb_q_vector *q_vector = container_of(napi,
4895 struct igb_q_vector,
4897 int tx_clean_complete = 1, work_done = 0;
4899 #ifdef CONFIG_IGB_DCA
4900 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4901 igb_update_dca(q_vector);
4903 if (q_vector->tx_ring)
4904 tx_clean_complete = igb_clean_tx_irq(q_vector);
4906 if (q_vector->rx_ring)
4907 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4909 if (!tx_clean_complete)
4912 /* If not enough Rx work done, exit the polling mode */
4913 if (work_done < budget) {
4914 napi_complete(napi);
4915 igb_ring_irq_enable(q_vector);
4922 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4923 * @adapter: board private structure
4924 * @shhwtstamps: timestamp structure to update
4925 * @regval: unsigned 64bit system time value.
4927 * We need to convert the system time value stored in the RX/TXSTMP registers
4928 * into a hwtstamp which can be used by the upper level timestamping functions
4930 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4931 struct skb_shared_hwtstamps *shhwtstamps,
4937 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4938 * 24 to match clock shift we setup earlier.
4940 if (adapter->hw.mac.type == e1000_82580)
4941 regval <<= IGB_82580_TSYNC_SHIFT;
4943 ns = timecounter_cyc2time(&adapter->clock, regval);
4944 timecompare_update(&adapter->compare, ns);
4945 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4946 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4947 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4951 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4952 * @q_vector: pointer to q_vector containing needed info
4953 * @skb: packet that was just sent
4955 * If we were asked to do hardware stamping and such a time stamp is
4956 * available, then it must have been for this skb here because we only
4957 * allow only one such packet into the queue.
4959 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4961 struct igb_adapter *adapter = q_vector->adapter;
4962 union skb_shared_tx *shtx = skb_tx(skb);
4963 struct e1000_hw *hw = &adapter->hw;
4964 struct skb_shared_hwtstamps shhwtstamps;
4967 /* if skb does not support hw timestamp or TX stamp not valid exit */
4968 if (likely(!shtx->hardware) ||
4969 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4972 regval = rd32(E1000_TXSTMPL);
4973 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4975 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4976 skb_tstamp_tx(skb, &shhwtstamps);
4980 * igb_clean_tx_irq - Reclaim resources after transmit completes
4981 * @q_vector: pointer to q_vector containing needed info
4982 * returns true if ring is completely cleaned
4984 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4986 struct igb_adapter *adapter = q_vector->adapter;
4987 struct igb_ring *tx_ring = q_vector->tx_ring;
4988 struct net_device *netdev = tx_ring->netdev;
4989 struct e1000_hw *hw = &adapter->hw;
4990 struct igb_buffer *buffer_info;
4991 struct sk_buff *skb;
4992 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4993 unsigned int total_bytes = 0, total_packets = 0;
4994 unsigned int i, eop, count = 0;
4995 bool cleaned = false;
4997 i = tx_ring->next_to_clean;
4998 eop = tx_ring->buffer_info[i].next_to_watch;
4999 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5001 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5002 (count < tx_ring->count)) {
5003 for (cleaned = false; !cleaned; count++) {
5004 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5005 buffer_info = &tx_ring->buffer_info[i];
5006 cleaned = (i == eop);
5007 skb = buffer_info->skb;
5010 unsigned int segs, bytecount;
5011 /* gso_segs is currently only valid for tcp */
5012 segs = buffer_info->gso_segs;
5013 /* multiply data chunks by size of headers */
5014 bytecount = ((segs - 1) * skb_headlen(skb)) +
5016 total_packets += segs;
5017 total_bytes += bytecount;
5019 igb_tx_hwtstamp(q_vector, skb);
5022 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5023 tx_desc->wb.status = 0;
5026 if (i == tx_ring->count)
5029 eop = tx_ring->buffer_info[i].next_to_watch;
5030 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5033 tx_ring->next_to_clean = i;
5035 if (unlikely(count &&
5036 netif_carrier_ok(netdev) &&
5037 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5038 /* Make sure that anybody stopping the queue after this
5039 * sees the new next_to_clean.
5042 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5043 !(test_bit(__IGB_DOWN, &adapter->state))) {
5044 netif_wake_subqueue(netdev, tx_ring->queue_index);
5045 tx_ring->tx_stats.restart_queue++;
5049 if (tx_ring->detect_tx_hung) {
5050 /* Detect a transmit hang in hardware, this serializes the
5051 * check with the clearing of time_stamp and movement of i */
5052 tx_ring->detect_tx_hung = false;
5053 if (tx_ring->buffer_info[i].time_stamp &&
5054 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5055 (adapter->tx_timeout_factor * HZ)) &&
5056 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5058 /* detected Tx unit hang */
5059 dev_err(&tx_ring->pdev->dev,
5060 "Detected Tx Unit Hang\n"
5064 " next_to_use <%x>\n"
5065 " next_to_clean <%x>\n"
5066 "buffer_info[next_to_clean]\n"
5067 " time_stamp <%lx>\n"
5068 " next_to_watch <%x>\n"
5070 " desc.status <%x>\n",
5071 tx_ring->queue_index,
5072 readl(tx_ring->head),
5073 readl(tx_ring->tail),
5074 tx_ring->next_to_use,
5075 tx_ring->next_to_clean,
5076 tx_ring->buffer_info[eop].time_stamp,
5079 eop_desc->wb.status);
5080 netif_stop_subqueue(netdev, tx_ring->queue_index);
5083 tx_ring->total_bytes += total_bytes;
5084 tx_ring->total_packets += total_packets;
5085 tx_ring->tx_stats.bytes += total_bytes;
5086 tx_ring->tx_stats.packets += total_packets;
5087 return (count < tx_ring->count);
5091 * igb_receive_skb - helper function to handle rx indications
5092 * @q_vector: structure containing interrupt and ring information
5093 * @skb: packet to send up
5094 * @vlan_tag: vlan tag for packet
5096 static void igb_receive_skb(struct igb_q_vector *q_vector,
5097 struct sk_buff *skb,
5100 struct igb_adapter *adapter = q_vector->adapter;
5102 if (vlan_tag && adapter->vlgrp)
5103 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5106 napi_gro_receive(&q_vector->napi, skb);
5109 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5110 u32 status_err, struct sk_buff *skb)
5112 skb->ip_summed = CHECKSUM_NONE;
5114 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5115 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5116 (status_err & E1000_RXD_STAT_IXSM))
5119 /* TCP/UDP checksum error bit is set */
5121 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5123 * work around errata with sctp packets where the TCPE aka
5124 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5125 * packets, (aka let the stack check the crc32c)
5127 if ((skb->len == 60) &&
5128 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5129 ring->rx_stats.csum_err++;
5131 /* let the stack verify checksum errors */
5134 /* It must be a TCP or UDP packet with a valid checksum */
5135 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5136 skb->ip_summed = CHECKSUM_UNNECESSARY;
5138 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5141 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5142 struct sk_buff *skb)
5144 struct igb_adapter *adapter = q_vector->adapter;
5145 struct e1000_hw *hw = &adapter->hw;
5149 * If this bit is set, then the RX registers contain the time stamp. No
5150 * other packet will be time stamped until we read these registers, so
5151 * read the registers to make them available again. Because only one
5152 * packet can be time stamped at a time, we know that the register
5153 * values must belong to this one here and therefore we don't need to
5154 * compare any of the additional attributes stored for it.
5156 * If nothing went wrong, then it should have a skb_shared_tx that we
5157 * can turn into a skb_shared_hwtstamps.
5159 if (staterr & E1000_RXDADV_STAT_TSIP) {
5160 u32 *stamp = (u32 *)skb->data;
5161 regval = le32_to_cpu(*(stamp + 2));
5162 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5163 skb_pull(skb, IGB_TS_HDR_LEN);
5165 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5168 regval = rd32(E1000_RXSTMPL);
5169 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5172 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5174 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5175 union e1000_adv_rx_desc *rx_desc)
5177 /* HW will not DMA in data larger than the given buffer, even if it
5178 * parses the (NFS, of course) header to be larger. In that case, it
5179 * fills the header buffer and spills the rest into the page.
5181 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5182 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5183 if (hlen > rx_ring->rx_buffer_len)
5184 hlen = rx_ring->rx_buffer_len;
5188 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5189 int *work_done, int budget)
5191 struct igb_ring *rx_ring = q_vector->rx_ring;
5192 struct net_device *netdev = rx_ring->netdev;
5193 struct pci_dev *pdev = rx_ring->pdev;
5194 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5195 struct igb_buffer *buffer_info , *next_buffer;
5196 struct sk_buff *skb;
5197 bool cleaned = false;
5198 int cleaned_count = 0;
5199 int current_node = numa_node_id();
5200 unsigned int total_bytes = 0, total_packets = 0;
5206 i = rx_ring->next_to_clean;
5207 buffer_info = &rx_ring->buffer_info[i];
5208 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5209 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5211 while (staterr & E1000_RXD_STAT_DD) {
5212 if (*work_done >= budget)
5216 skb = buffer_info->skb;
5217 prefetch(skb->data - NET_IP_ALIGN);
5218 buffer_info->skb = NULL;
5221 if (i == rx_ring->count)
5224 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5226 next_buffer = &rx_ring->buffer_info[i];
5228 length = le16_to_cpu(rx_desc->wb.upper.length);
5232 if (buffer_info->dma) {
5233 pci_unmap_single(pdev, buffer_info->dma,
5234 rx_ring->rx_buffer_len,
5235 PCI_DMA_FROMDEVICE);
5236 buffer_info->dma = 0;
5237 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5238 skb_put(skb, length);
5241 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5245 pci_unmap_page(pdev, buffer_info->page_dma,
5246 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5247 buffer_info->page_dma = 0;
5249 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5251 buffer_info->page_offset,
5254 if ((page_count(buffer_info->page) != 1) ||
5255 (page_to_nid(buffer_info->page) != current_node))
5256 buffer_info->page = NULL;
5258 get_page(buffer_info->page);
5261 skb->data_len += length;
5262 skb->truesize += length;
5265 if (!(staterr & E1000_RXD_STAT_EOP)) {
5266 buffer_info->skb = next_buffer->skb;
5267 buffer_info->dma = next_buffer->dma;
5268 next_buffer->skb = skb;
5269 next_buffer->dma = 0;
5273 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5274 dev_kfree_skb_irq(skb);
5278 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5279 igb_rx_hwtstamp(q_vector, staterr, skb);
5280 total_bytes += skb->len;
5283 igb_rx_checksum_adv(rx_ring, staterr, skb);
5285 skb->protocol = eth_type_trans(skb, netdev);
5286 skb_record_rx_queue(skb, rx_ring->queue_index);
5288 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5289 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5291 igb_receive_skb(q_vector, skb, vlan_tag);
5294 rx_desc->wb.upper.status_error = 0;
5296 /* return some buffers to hardware, one at a time is too slow */
5297 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5298 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5302 /* use prefetched values */
5304 buffer_info = next_buffer;
5305 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5308 rx_ring->next_to_clean = i;
5309 cleaned_count = igb_desc_unused(rx_ring);
5312 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5314 rx_ring->total_packets += total_packets;
5315 rx_ring->total_bytes += total_bytes;
5316 rx_ring->rx_stats.packets += total_packets;
5317 rx_ring->rx_stats.bytes += total_bytes;
5322 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5323 * @adapter: address of board private structure
5325 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5327 struct net_device *netdev = rx_ring->netdev;
5328 union e1000_adv_rx_desc *rx_desc;
5329 struct igb_buffer *buffer_info;
5330 struct sk_buff *skb;
5334 i = rx_ring->next_to_use;
5335 buffer_info = &rx_ring->buffer_info[i];
5337 bufsz = rx_ring->rx_buffer_len;
5339 while (cleaned_count--) {
5340 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5342 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5343 if (!buffer_info->page) {
5344 buffer_info->page = netdev_alloc_page(netdev);
5345 if (!buffer_info->page) {
5346 rx_ring->rx_stats.alloc_failed++;
5349 buffer_info->page_offset = 0;
5351 buffer_info->page_offset ^= PAGE_SIZE / 2;
5353 buffer_info->page_dma =
5354 pci_map_page(rx_ring->pdev, buffer_info->page,
5355 buffer_info->page_offset,
5357 PCI_DMA_FROMDEVICE);
5358 if (pci_dma_mapping_error(rx_ring->pdev,
5359 buffer_info->page_dma)) {
5360 buffer_info->page_dma = 0;
5361 rx_ring->rx_stats.alloc_failed++;
5366 skb = buffer_info->skb;
5368 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5370 rx_ring->rx_stats.alloc_failed++;
5374 buffer_info->skb = skb;
5376 if (!buffer_info->dma) {
5377 buffer_info->dma = pci_map_single(rx_ring->pdev,
5380 PCI_DMA_FROMDEVICE);
5381 if (pci_dma_mapping_error(rx_ring->pdev,
5382 buffer_info->dma)) {
5383 buffer_info->dma = 0;
5384 rx_ring->rx_stats.alloc_failed++;
5388 /* Refresh the desc even if buffer_addrs didn't change because
5389 * each write-back erases this info. */
5390 if (bufsz < IGB_RXBUFFER_1024) {
5391 rx_desc->read.pkt_addr =
5392 cpu_to_le64(buffer_info->page_dma);
5393 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5395 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5396 rx_desc->read.hdr_addr = 0;
5400 if (i == rx_ring->count)
5402 buffer_info = &rx_ring->buffer_info[i];
5406 if (rx_ring->next_to_use != i) {
5407 rx_ring->next_to_use = i;
5409 i = (rx_ring->count - 1);
5413 /* Force memory writes to complete before letting h/w
5414 * know there are new descriptors to fetch. (Only
5415 * applicable for weak-ordered memory model archs,
5416 * such as IA-64). */
5418 writel(i, rx_ring->tail);
5428 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5430 struct igb_adapter *adapter = netdev_priv(netdev);
5431 struct mii_ioctl_data *data = if_mii(ifr);
5433 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5438 data->phy_id = adapter->hw.phy.addr;
5441 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5453 * igb_hwtstamp_ioctl - control hardware time stamping
5458 * Outgoing time stamping can be enabled and disabled. Play nice and
5459 * disable it when requested, although it shouldn't case any overhead
5460 * when no packet needs it. At most one packet in the queue may be
5461 * marked for time stamping, otherwise it would be impossible to tell
5462 * for sure to which packet the hardware time stamp belongs.
5464 * Incoming time stamping has to be configured via the hardware
5465 * filters. Not all combinations are supported, in particular event
5466 * type has to be specified. Matching the kind of event packet is
5467 * not supported, with the exception of "all V2 events regardless of
5471 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5472 struct ifreq *ifr, int cmd)
5474 struct igb_adapter *adapter = netdev_priv(netdev);
5475 struct e1000_hw *hw = &adapter->hw;
5476 struct hwtstamp_config config;
5477 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5478 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5479 u32 tsync_rx_cfg = 0;
5484 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5487 /* reserved for future extensions */
5491 switch (config.tx_type) {
5492 case HWTSTAMP_TX_OFF:
5494 case HWTSTAMP_TX_ON:
5500 switch (config.rx_filter) {
5501 case HWTSTAMP_FILTER_NONE:
5504 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5505 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5506 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5507 case HWTSTAMP_FILTER_ALL:
5509 * register TSYNCRXCFG must be set, therefore it is not
5510 * possible to time stamp both Sync and Delay_Req messages
5511 * => fall back to time stamping all packets
5513 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5514 config.rx_filter = HWTSTAMP_FILTER_ALL;
5516 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5517 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5518 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5521 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5522 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5523 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5526 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5527 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5528 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5529 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5532 config.rx_filter = HWTSTAMP_FILTER_SOME;
5534 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5535 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5536 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5537 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5540 config.rx_filter = HWTSTAMP_FILTER_SOME;
5542 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5543 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5544 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5545 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5546 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5553 if (hw->mac.type == e1000_82575) {
5554 if (tsync_rx_ctl | tsync_tx_ctl)
5560 * Per-packet timestamping only works if all packets are
5561 * timestamped, so enable timestamping in all packets as
5562 * long as one rx filter was configured.
5564 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5565 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5566 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5569 /* enable/disable TX */
5570 regval = rd32(E1000_TSYNCTXCTL);
5571 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5572 regval |= tsync_tx_ctl;
5573 wr32(E1000_TSYNCTXCTL, regval);
5575 /* enable/disable RX */
5576 regval = rd32(E1000_TSYNCRXCTL);
5577 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5578 regval |= tsync_rx_ctl;
5579 wr32(E1000_TSYNCRXCTL, regval);
5581 /* define which PTP packets are time stamped */
5582 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5584 /* define ethertype filter for timestamped packets */
5587 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5588 E1000_ETQF_1588 | /* enable timestamping */
5589 ETH_P_1588)); /* 1588 eth protocol type */
5591 wr32(E1000_ETQF(3), 0);
5593 #define PTP_PORT 319
5594 /* L4 Queue Filter[3]: filter by destination port and protocol */
5596 u32 ftqf = (IPPROTO_UDP /* UDP */
5597 | E1000_FTQF_VF_BP /* VF not compared */
5598 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5599 | E1000_FTQF_MASK); /* mask all inputs */
5600 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5602 wr32(E1000_IMIR(3), htons(PTP_PORT));
5603 wr32(E1000_IMIREXT(3),
5604 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5605 if (hw->mac.type == e1000_82576) {
5606 /* enable source port check */
5607 wr32(E1000_SPQF(3), htons(PTP_PORT));
5608 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5610 wr32(E1000_FTQF(3), ftqf);
5612 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5616 adapter->hwtstamp_config = config;
5618 /* clear TX/RX time stamp registers, just to be sure */
5619 regval = rd32(E1000_TXSTMPH);
5620 regval = rd32(E1000_RXSTMPH);
5622 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5632 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5638 return igb_mii_ioctl(netdev, ifr, cmd);
5640 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5646 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5648 struct igb_adapter *adapter = hw->back;
5651 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5653 return -E1000_ERR_CONFIG;
5655 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5660 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5662 struct igb_adapter *adapter = hw->back;
5665 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5667 return -E1000_ERR_CONFIG;
5669 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5674 static void igb_vlan_rx_register(struct net_device *netdev,
5675 struct vlan_group *grp)
5677 struct igb_adapter *adapter = netdev_priv(netdev);
5678 struct e1000_hw *hw = &adapter->hw;
5681 igb_irq_disable(adapter);
5682 adapter->vlgrp = grp;
5685 /* enable VLAN tag insert/strip */
5686 ctrl = rd32(E1000_CTRL);
5687 ctrl |= E1000_CTRL_VME;
5688 wr32(E1000_CTRL, ctrl);
5690 /* Disable CFI check */
5691 rctl = rd32(E1000_RCTL);
5692 rctl &= ~E1000_RCTL_CFIEN;
5693 wr32(E1000_RCTL, rctl);
5695 /* disable VLAN tag insert/strip */
5696 ctrl = rd32(E1000_CTRL);
5697 ctrl &= ~E1000_CTRL_VME;
5698 wr32(E1000_CTRL, ctrl);
5701 igb_rlpml_set(adapter);
5703 if (!test_bit(__IGB_DOWN, &adapter->state))
5704 igb_irq_enable(adapter);
5707 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5709 struct igb_adapter *adapter = netdev_priv(netdev);
5710 struct e1000_hw *hw = &adapter->hw;
5711 int pf_id = adapter->vfs_allocated_count;
5713 /* attempt to add filter to vlvf array */
5714 igb_vlvf_set(adapter, vid, true, pf_id);
5716 /* add the filter since PF can receive vlans w/o entry in vlvf */
5717 igb_vfta_set(hw, vid, true);
5720 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5722 struct igb_adapter *adapter = netdev_priv(netdev);
5723 struct e1000_hw *hw = &adapter->hw;
5724 int pf_id = adapter->vfs_allocated_count;
5727 igb_irq_disable(adapter);
5728 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5730 if (!test_bit(__IGB_DOWN, &adapter->state))
5731 igb_irq_enable(adapter);
5733 /* remove vlan from VLVF table array */
5734 err = igb_vlvf_set(adapter, vid, false, pf_id);
5736 /* if vid was not present in VLVF just remove it from table */
5738 igb_vfta_set(hw, vid, false);
5741 static void igb_restore_vlan(struct igb_adapter *adapter)
5743 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5745 if (adapter->vlgrp) {
5747 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5748 if (!vlan_group_get_device(adapter->vlgrp, vid))
5750 igb_vlan_rx_add_vid(adapter->netdev, vid);
5755 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5757 struct pci_dev *pdev = adapter->pdev;
5758 struct e1000_mac_info *mac = &adapter->hw.mac;
5763 case SPEED_10 + DUPLEX_HALF:
5764 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5766 case SPEED_10 + DUPLEX_FULL:
5767 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5769 case SPEED_100 + DUPLEX_HALF:
5770 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5772 case SPEED_100 + DUPLEX_FULL:
5773 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5775 case SPEED_1000 + DUPLEX_FULL:
5777 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5779 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5781 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5787 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5789 struct net_device *netdev = pci_get_drvdata(pdev);
5790 struct igb_adapter *adapter = netdev_priv(netdev);
5791 struct e1000_hw *hw = &adapter->hw;
5792 u32 ctrl, rctl, status;
5793 u32 wufc = adapter->wol;
5798 netif_device_detach(netdev);
5800 if (netif_running(netdev))
5803 igb_clear_interrupt_scheme(adapter);
5806 retval = pci_save_state(pdev);
5811 status = rd32(E1000_STATUS);
5812 if (status & E1000_STATUS_LU)
5813 wufc &= ~E1000_WUFC_LNKC;
5816 igb_setup_rctl(adapter);
5817 igb_set_rx_mode(netdev);
5819 /* turn on all-multi mode if wake on multicast is enabled */
5820 if (wufc & E1000_WUFC_MC) {
5821 rctl = rd32(E1000_RCTL);
5822 rctl |= E1000_RCTL_MPE;
5823 wr32(E1000_RCTL, rctl);
5826 ctrl = rd32(E1000_CTRL);
5827 /* advertise wake from D3Cold */
5828 #define E1000_CTRL_ADVD3WUC 0x00100000
5829 /* phy power management enable */
5830 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5831 ctrl |= E1000_CTRL_ADVD3WUC;
5832 wr32(E1000_CTRL, ctrl);
5834 /* Allow time for pending master requests to run */
5835 igb_disable_pcie_master(hw);
5837 wr32(E1000_WUC, E1000_WUC_PME_EN);
5838 wr32(E1000_WUFC, wufc);
5841 wr32(E1000_WUFC, 0);
5844 *enable_wake = wufc || adapter->en_mng_pt;
5846 igb_power_down_link(adapter);
5848 igb_power_up_link(adapter);
5850 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5851 * would have already happened in close and is redundant. */
5852 igb_release_hw_control(adapter);
5854 pci_disable_device(pdev);
5860 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5865 retval = __igb_shutdown(pdev, &wake);
5870 pci_prepare_to_sleep(pdev);
5872 pci_wake_from_d3(pdev, false);
5873 pci_set_power_state(pdev, PCI_D3hot);
5879 static int igb_resume(struct pci_dev *pdev)
5881 struct net_device *netdev = pci_get_drvdata(pdev);
5882 struct igb_adapter *adapter = netdev_priv(netdev);
5883 struct e1000_hw *hw = &adapter->hw;
5886 pci_set_power_state(pdev, PCI_D0);
5887 pci_restore_state(pdev);
5888 pci_save_state(pdev);
5890 err = pci_enable_device_mem(pdev);
5893 "igb: Cannot enable PCI device from suspend\n");
5896 pci_set_master(pdev);
5898 pci_enable_wake(pdev, PCI_D3hot, 0);
5899 pci_enable_wake(pdev, PCI_D3cold, 0);
5901 if (igb_init_interrupt_scheme(adapter)) {
5902 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5908 /* let the f/w know that the h/w is now under the control of the
5910 igb_get_hw_control(adapter);
5912 wr32(E1000_WUS, ~0);
5914 if (netif_running(netdev)) {
5915 err = igb_open(netdev);
5920 netif_device_attach(netdev);
5926 static void igb_shutdown(struct pci_dev *pdev)
5930 __igb_shutdown(pdev, &wake);
5932 if (system_state == SYSTEM_POWER_OFF) {
5933 pci_wake_from_d3(pdev, wake);
5934 pci_set_power_state(pdev, PCI_D3hot);
5938 #ifdef CONFIG_NET_POLL_CONTROLLER
5940 * Polling 'interrupt' - used by things like netconsole to send skbs
5941 * without having to re-enable interrupts. It's not called while
5942 * the interrupt routine is executing.
5944 static void igb_netpoll(struct net_device *netdev)
5946 struct igb_adapter *adapter = netdev_priv(netdev);
5947 struct e1000_hw *hw = &adapter->hw;
5950 if (!adapter->msix_entries) {
5951 struct igb_q_vector *q_vector = adapter->q_vector[0];
5952 igb_irq_disable(adapter);
5953 napi_schedule(&q_vector->napi);
5957 for (i = 0; i < adapter->num_q_vectors; i++) {
5958 struct igb_q_vector *q_vector = adapter->q_vector[i];
5959 wr32(E1000_EIMC, q_vector->eims_value);
5960 napi_schedule(&q_vector->napi);
5963 #endif /* CONFIG_NET_POLL_CONTROLLER */
5966 * igb_io_error_detected - called when PCI error is detected
5967 * @pdev: Pointer to PCI device
5968 * @state: The current pci connection state
5970 * This function is called after a PCI bus error affecting
5971 * this device has been detected.
5973 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5974 pci_channel_state_t state)
5976 struct net_device *netdev = pci_get_drvdata(pdev);
5977 struct igb_adapter *adapter = netdev_priv(netdev);
5979 netif_device_detach(netdev);
5981 if (state == pci_channel_io_perm_failure)
5982 return PCI_ERS_RESULT_DISCONNECT;
5984 if (netif_running(netdev))
5986 pci_disable_device(pdev);
5988 /* Request a slot slot reset. */
5989 return PCI_ERS_RESULT_NEED_RESET;
5993 * igb_io_slot_reset - called after the pci bus has been reset.
5994 * @pdev: Pointer to PCI device
5996 * Restart the card from scratch, as if from a cold-boot. Implementation
5997 * resembles the first-half of the igb_resume routine.
5999 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6001 struct net_device *netdev = pci_get_drvdata(pdev);
6002 struct igb_adapter *adapter = netdev_priv(netdev);
6003 struct e1000_hw *hw = &adapter->hw;
6004 pci_ers_result_t result;
6007 if (pci_enable_device_mem(pdev)) {
6009 "Cannot re-enable PCI device after reset.\n");
6010 result = PCI_ERS_RESULT_DISCONNECT;
6012 pci_set_master(pdev);
6013 pci_restore_state(pdev);
6014 pci_save_state(pdev);
6016 pci_enable_wake(pdev, PCI_D3hot, 0);
6017 pci_enable_wake(pdev, PCI_D3cold, 0);
6020 wr32(E1000_WUS, ~0);
6021 result = PCI_ERS_RESULT_RECOVERED;
6024 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6026 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6027 "failed 0x%0x\n", err);
6028 /* non-fatal, continue */
6035 * igb_io_resume - called when traffic can start flowing again.
6036 * @pdev: Pointer to PCI device
6038 * This callback is called when the error recovery driver tells us that
6039 * its OK to resume normal operation. Implementation resembles the
6040 * second-half of the igb_resume routine.
6042 static void igb_io_resume(struct pci_dev *pdev)
6044 struct net_device *netdev = pci_get_drvdata(pdev);
6045 struct igb_adapter *adapter = netdev_priv(netdev);
6047 if (netif_running(netdev)) {
6048 if (igb_up(adapter)) {
6049 dev_err(&pdev->dev, "igb_up failed after reset\n");
6054 netif_device_attach(netdev);
6056 /* let the f/w know that the h/w is now under the control of the
6058 igb_get_hw_control(adapter);
6061 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6064 u32 rar_low, rar_high;
6065 struct e1000_hw *hw = &adapter->hw;
6067 /* HW expects these in little endian so we reverse the byte order
6068 * from network order (big endian) to little endian
6070 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6071 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6072 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6074 /* Indicate to hardware the Address is Valid. */
6075 rar_high |= E1000_RAH_AV;
6077 if (hw->mac.type == e1000_82575)
6078 rar_high |= E1000_RAH_POOL_1 * qsel;
6080 rar_high |= E1000_RAH_POOL_1 << qsel;
6082 wr32(E1000_RAL(index), rar_low);
6084 wr32(E1000_RAH(index), rar_high);
6088 static int igb_set_vf_mac(struct igb_adapter *adapter,
6089 int vf, unsigned char *mac_addr)
6091 struct e1000_hw *hw = &adapter->hw;
6092 /* VF MAC addresses start at end of receive addresses and moves
6093 * torwards the first, as a result a collision should not be possible */
6094 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6096 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6098 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6103 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6105 struct igb_adapter *adapter = netdev_priv(netdev);
6106 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6108 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6109 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6110 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6111 " change effective.");
6112 if (test_bit(__IGB_DOWN, &adapter->state)) {
6113 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6114 " but the PF device is not up.\n");
6115 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6116 " attempting to use the VF device.\n");
6118 return igb_set_vf_mac(adapter, vf, mac);
6121 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6126 static int igb_ndo_get_vf_config(struct net_device *netdev,
6127 int vf, struct ifla_vf_info *ivi)
6129 struct igb_adapter *adapter = netdev_priv(netdev);
6130 if (vf >= adapter->vfs_allocated_count)
6133 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6135 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6136 ivi->qos = adapter->vf_data[vf].pf_qos;
6140 static void igb_vmm_control(struct igb_adapter *adapter)
6142 struct e1000_hw *hw = &adapter->hw;
6145 switch (hw->mac.type) {
6148 /* replication is not supported for 82575 */
6151 /* notify HW that the MAC is adding vlan tags */
6152 reg = rd32(E1000_DTXCTL);
6153 reg |= E1000_DTXCTL_VLAN_ADDED;
6154 wr32(E1000_DTXCTL, reg);
6156 /* enable replication vlan tag stripping */
6157 reg = rd32(E1000_RPLOLR);
6158 reg |= E1000_RPLOLR_STRVLAN;
6159 wr32(E1000_RPLOLR, reg);
6161 /* none of the above registers are supported by i350 */
6165 if (adapter->vfs_allocated_count) {
6166 igb_vmdq_set_loopback_pf(hw, true);
6167 igb_vmdq_set_replication_pf(hw, true);
6169 igb_vmdq_set_loopback_pf(hw, false);
6170 igb_vmdq_set_replication_pf(hw, false);