1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
55 #include <linux/dca.h>
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
70 static const struct e1000_info *igb_info_tbl[] = {
71 [board_82575] = &e1000_82575_info,
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100 /* required last entry */
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160 int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163 struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
167 static int igb_suspend(struct pci_dev *, pm_message_t);
168 static int igb_resume(struct pci_dev *);
170 static void igb_shutdown(struct pci_dev *);
171 #ifdef CONFIG_IGB_DCA
172 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173 static struct notifier_block dca_notifier = {
174 .notifier_call = igb_notify_dca,
179 #ifdef CONFIG_NET_POLL_CONTROLLER
180 /* for netdump / net console */
181 static void igb_netpoll(struct net_device *);
183 #ifdef CONFIG_PCI_IOV
184 static unsigned int max_vfs = 0;
185 module_param(max_vfs, uint, 0);
186 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187 "per physical function");
188 #endif /* CONFIG_PCI_IOV */
190 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191 pci_channel_state_t);
192 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193 static void igb_io_resume(struct pci_dev *);
195 static struct pci_error_handlers igb_err_handler = {
196 .error_detected = igb_io_error_detected,
197 .slot_reset = igb_io_slot_reset,
198 .resume = igb_io_resume,
202 static struct pci_driver igb_driver = {
203 .name = igb_driver_name,
204 .id_table = igb_pci_tbl,
206 .remove = __devexit_p(igb_remove),
208 /* Power Management Hooks */
209 .suspend = igb_suspend,
210 .resume = igb_resume,
212 .shutdown = igb_shutdown,
213 .err_handler = &igb_err_handler
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
221 struct igb_reg_info {
226 static const struct igb_reg_info igb_reg_info_tbl[] = {
228 /* General Registers */
229 {E1000_CTRL, "CTRL"},
230 {E1000_STATUS, "STATUS"},
231 {E1000_CTRL_EXT, "CTRL_EXT"},
233 /* Interrupt Registers */
237 {E1000_RCTL, "RCTL"},
238 {E1000_RDLEN(0), "RDLEN"},
239 {E1000_RDH(0), "RDH"},
240 {E1000_RDT(0), "RDT"},
241 {E1000_RXDCTL(0), "RXDCTL"},
242 {E1000_RDBAL(0), "RDBAL"},
243 {E1000_RDBAH(0), "RDBAH"},
246 {E1000_TCTL, "TCTL"},
247 {E1000_TDBAL(0), "TDBAL"},
248 {E1000_TDBAH(0), "TDBAH"},
249 {E1000_TDLEN(0), "TDLEN"},
250 {E1000_TDH(0), "TDH"},
251 {E1000_TDT(0), "TDT"},
252 {E1000_TXDCTL(0), "TXDCTL"},
253 {E1000_TDFH, "TDFH"},
254 {E1000_TDFT, "TDFT"},
255 {E1000_TDFHS, "TDFHS"},
256 {E1000_TDFPC, "TDFPC"},
258 /* List Terminator */
263 * igb_regdump - register printout routine
265 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
271 switch (reginfo->ofs) {
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDLEN(n));
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDH(n));
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RDT(n));
284 case E1000_RXDCTL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RXDCTL(n));
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAL(n));
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAH(n));
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDBAL(n));
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDBAH(n));
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDLEN(n));
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDH(n));
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TDT(n));
316 case E1000_TXDCTL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_TXDCTL(n));
321 printk(KERN_INFO "%-15s %08x\n",
322 reginfo->name, rd32(reginfo->ofs));
326 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327 printk(KERN_INFO "%-15s ", rname);
328 for (n = 0; n < 4; n++)
329 printk(KERN_CONT "%08x ", regs[n]);
330 printk(KERN_CONT "\n");
334 * igb_dump - Print registers, tx-rings and rx-rings
336 static void igb_dump(struct igb_adapter *adapter)
338 struct net_device *netdev = adapter->netdev;
339 struct e1000_hw *hw = &adapter->hw;
340 struct igb_reg_info *reginfo;
342 struct igb_ring *tx_ring;
343 union e1000_adv_tx_desc *tx_desc;
344 struct my_u0 { u64 a; u64 b; } *u0;
345 struct igb_ring *rx_ring;
346 union e1000_adv_rx_desc *rx_desc;
350 if (!netif_msg_hw(adapter))
353 /* Print netdevice Info */
355 dev_info(&adapter->pdev->dev, "Net device Info\n");
356 printk(KERN_INFO "Device Name state "
357 "trans_start last_rx\n");
358 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
365 /* Print Registers */
366 dev_info(&adapter->pdev->dev, "Register Dump\n");
367 printk(KERN_INFO " Register Name Value\n");
368 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
369 reginfo->name; reginfo++) {
370 igb_regdump(hw, reginfo);
373 /* Print TX Ring Summary */
374 if (!netdev || !netif_running(netdev))
377 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
378 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
379 " leng ntw timestamp\n");
380 for (n = 0; n < adapter->num_tx_queues; n++) {
381 struct igb_tx_buffer *buffer_info;
382 tx_ring = adapter->tx_ring[n];
383 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
384 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
385 n, tx_ring->next_to_use, tx_ring->next_to_clean,
386 (u64)buffer_info->dma,
388 buffer_info->next_to_watch,
389 (u64)buffer_info->time_stamp);
393 if (!netif_msg_tx_done(adapter))
394 goto rx_ring_summary;
396 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
398 /* Transmit Descriptor Formats
400 * Advanced Transmit Descriptor
401 * +--------------------------------------------------------------+
402 * 0 | Buffer Address [63:0] |
403 * +--------------------------------------------------------------+
404 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
405 * +--------------------------------------------------------------+
406 * 63 46 45 40 39 38 36 35 32 31 24 15 0
409 for (n = 0; n < adapter->num_tx_queues; n++) {
410 tx_ring = adapter->tx_ring[n];
411 printk(KERN_INFO "------------------------------------\n");
412 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
413 printk(KERN_INFO "------------------------------------\n");
414 printk(KERN_INFO "T [desc] [address 63:0 ] "
415 "[PlPOCIStDDM Ln] [bi->dma ] "
416 "leng ntw timestamp bi->skb\n");
418 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
419 struct igb_tx_buffer *buffer_info;
420 tx_desc = IGB_TX_DESC(tx_ring, i);
421 buffer_info = &tx_ring->tx_buffer_info[i];
422 u0 = (struct my_u0 *)tx_desc;
423 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
424 " %04X %p %016llX %p", i,
427 (u64)buffer_info->dma,
429 buffer_info->next_to_watch,
430 (u64)buffer_info->time_stamp,
432 if (i == tx_ring->next_to_use &&
433 i == tx_ring->next_to_clean)
434 printk(KERN_CONT " NTC/U\n");
435 else if (i == tx_ring->next_to_use)
436 printk(KERN_CONT " NTU\n");
437 else if (i == tx_ring->next_to_clean)
438 printk(KERN_CONT " NTC\n");
440 printk(KERN_CONT "\n");
442 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
443 print_hex_dump(KERN_INFO, "",
445 16, 1, phys_to_virt(buffer_info->dma),
446 buffer_info->length, true);
450 /* Print RX Rings Summary */
452 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
453 printk(KERN_INFO "Queue [NTU] [NTC]\n");
454 for (n = 0; n < adapter->num_rx_queues; n++) {
455 rx_ring = adapter->rx_ring[n];
456 printk(KERN_INFO " %5d %5X %5X\n", n,
457 rx_ring->next_to_use, rx_ring->next_to_clean);
461 if (!netif_msg_rx_status(adapter))
464 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
466 /* Advanced Receive Descriptor (Read) Format
468 * +-----------------------------------------------------+
469 * 0 | Packet Buffer Address [63:1] |A0/NSE|
470 * +----------------------------------------------+------+
471 * 8 | Header Buffer Address [63:1] | DD |
472 * +-----------------------------------------------------+
475 * Advanced Receive Descriptor (Write-Back) Format
477 * 63 48 47 32 31 30 21 20 17 16 4 3 0
478 * +------------------------------------------------------+
479 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
480 * | Checksum Ident | | | | Type | Type |
481 * +------------------------------------------------------+
482 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
483 * +------------------------------------------------------+
484 * 63 48 47 32 31 20 19 0
487 for (n = 0; n < adapter->num_rx_queues; n++) {
488 rx_ring = adapter->rx_ring[n];
489 printk(KERN_INFO "------------------------------------\n");
490 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
491 printk(KERN_INFO "------------------------------------\n");
492 printk(KERN_INFO "R [desc] [ PktBuf A0] "
493 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
494 "<-- Adv Rx Read format\n");
495 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
496 "[vl er S cks ln] ---------------- [bi->skb] "
497 "<-- Adv Rx Write-Back format\n");
499 for (i = 0; i < rx_ring->count; i++) {
500 struct igb_rx_buffer *buffer_info;
501 buffer_info = &rx_ring->rx_buffer_info[i];
502 rx_desc = IGB_RX_DESC(rx_ring, i);
503 u0 = (struct my_u0 *)rx_desc;
504 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
505 if (staterr & E1000_RXD_STAT_DD) {
506 /* Descriptor Done */
507 printk(KERN_INFO "RWB[0x%03X] %016llX "
508 "%016llX ---------------- %p", i,
513 printk(KERN_INFO "R [0x%03X] %016llX "
514 "%016llX %016llX %p", i,
517 (u64)buffer_info->dma,
520 if (netif_msg_pktdata(adapter)) {
521 print_hex_dump(KERN_INFO, "",
524 phys_to_virt(buffer_info->dma),
525 IGB_RX_HDR_LEN, true);
526 print_hex_dump(KERN_INFO, "",
530 buffer_info->page_dma +
531 buffer_info->page_offset),
536 if (i == rx_ring->next_to_use)
537 printk(KERN_CONT " NTU\n");
538 else if (i == rx_ring->next_to_clean)
539 printk(KERN_CONT " NTC\n");
541 printk(KERN_CONT "\n");
552 * igb_read_clock - read raw cycle counter (to be used by time counter)
554 static cycle_t igb_read_clock(const struct cyclecounter *tc)
556 struct igb_adapter *adapter =
557 container_of(tc, struct igb_adapter, cycles);
558 struct e1000_hw *hw = &adapter->hw;
563 * The timestamp latches on lowest register read. For the 82580
564 * the lowest register is SYSTIMR instead of SYSTIML. However we never
565 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
567 if (hw->mac.type == e1000_82580) {
568 stamp = rd32(E1000_SYSTIMR) >> 8;
569 shift = IGB_82580_TSYNC_SHIFT;
572 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
573 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
578 * igb_get_hw_dev - return device
579 * used by hardware layer to print debugging information
581 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
583 struct igb_adapter *adapter = hw->back;
584 return adapter->netdev;
588 * igb_init_module - Driver Registration Routine
590 * igb_init_module is the first routine called when the driver is
591 * loaded. All it does is register with the PCI subsystem.
593 static int __init igb_init_module(void)
596 printk(KERN_INFO "%s - version %s\n",
597 igb_driver_string, igb_driver_version);
599 printk(KERN_INFO "%s\n", igb_copyright);
601 #ifdef CONFIG_IGB_DCA
602 dca_register_notify(&dca_notifier);
604 ret = pci_register_driver(&igb_driver);
608 module_init(igb_init_module);
611 * igb_exit_module - Driver Exit Cleanup Routine
613 * igb_exit_module is called just before the driver is removed
616 static void __exit igb_exit_module(void)
618 #ifdef CONFIG_IGB_DCA
619 dca_unregister_notify(&dca_notifier);
621 pci_unregister_driver(&igb_driver);
624 module_exit(igb_exit_module);
626 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
628 * igb_cache_ring_register - Descriptor ring to register mapping
629 * @adapter: board private structure to initialize
631 * Once we know the feature-set enabled for the device, we'll cache
632 * the register offset the descriptor ring is assigned to.
634 static void igb_cache_ring_register(struct igb_adapter *adapter)
637 u32 rbase_offset = adapter->vfs_allocated_count;
639 switch (adapter->hw.mac.type) {
641 /* The queues are allocated for virtualization such that VF 0
642 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
643 * In order to avoid collision we start at the first free queue
644 * and continue consuming queues in the same sequence
646 if (adapter->vfs_allocated_count) {
647 for (; i < adapter->rss_queues; i++)
648 adapter->rx_ring[i]->reg_idx = rbase_offset +
655 for (; i < adapter->num_rx_queues; i++)
656 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
657 for (; j < adapter->num_tx_queues; j++)
658 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
663 static void igb_free_queues(struct igb_adapter *adapter)
667 for (i = 0; i < adapter->num_tx_queues; i++) {
668 kfree(adapter->tx_ring[i]);
669 adapter->tx_ring[i] = NULL;
671 for (i = 0; i < adapter->num_rx_queues; i++) {
672 kfree(adapter->rx_ring[i]);
673 adapter->rx_ring[i] = NULL;
675 adapter->num_rx_queues = 0;
676 adapter->num_tx_queues = 0;
680 * igb_alloc_queues - Allocate memory for all rings
681 * @adapter: board private structure to initialize
683 * We allocate one ring per queue at run-time since we don't know the
684 * number of queues at compile-time.
686 static int igb_alloc_queues(struct igb_adapter *adapter)
688 struct igb_ring *ring;
691 for (i = 0; i < adapter->num_tx_queues; i++) {
692 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
695 ring->count = adapter->tx_ring_count;
696 ring->queue_index = i;
697 ring->dev = &adapter->pdev->dev;
698 ring->netdev = adapter->netdev;
699 /* For 82575, context index must be unique per ring. */
700 if (adapter->hw.mac.type == e1000_82575)
701 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
702 adapter->tx_ring[i] = ring;
705 for (i = 0; i < adapter->num_rx_queues; i++) {
706 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
709 ring->count = adapter->rx_ring_count;
710 ring->queue_index = i;
711 ring->dev = &adapter->pdev->dev;
712 ring->netdev = adapter->netdev;
713 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
714 /* set flag indicating ring supports SCTP checksum offload */
715 if (adapter->hw.mac.type >= e1000_82576)
716 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
717 adapter->rx_ring[i] = ring;
720 igb_cache_ring_register(adapter);
725 igb_free_queues(adapter);
730 #define IGB_N0_QUEUE -1
731 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
734 struct igb_adapter *adapter = q_vector->adapter;
735 struct e1000_hw *hw = &adapter->hw;
737 int rx_queue = IGB_N0_QUEUE;
738 int tx_queue = IGB_N0_QUEUE;
740 if (q_vector->rx_ring)
741 rx_queue = q_vector->rx_ring->reg_idx;
742 if (q_vector->tx_ring)
743 tx_queue = q_vector->tx_ring->reg_idx;
745 switch (hw->mac.type) {
747 /* The 82575 assigns vectors using a bitmask, which matches the
748 bitmask for the EICR/EIMS/EIMC registers. To assign one
749 or more queues to a vector, we write the appropriate bits
750 into the MSIXBM register for that vector. */
751 if (rx_queue > IGB_N0_QUEUE)
752 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
753 if (tx_queue > IGB_N0_QUEUE)
754 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
755 if (!adapter->msix_entries && msix_vector == 0)
756 msixbm |= E1000_EIMS_OTHER;
757 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
758 q_vector->eims_value = msixbm;
761 /* 82576 uses a table-based method for assigning vectors.
762 Each queue has a single entry in the table to which we write
763 a vector number along with a "valid" bit. Sadly, the layout
764 of the table is somewhat counterintuitive. */
765 if (rx_queue > IGB_N0_QUEUE) {
766 index = (rx_queue & 0x7);
767 ivar = array_rd32(E1000_IVAR0, index);
769 /* vector goes into low byte of register */
770 ivar = ivar & 0xFFFFFF00;
771 ivar |= msix_vector | E1000_IVAR_VALID;
773 /* vector goes into third byte of register */
774 ivar = ivar & 0xFF00FFFF;
775 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
777 array_wr32(E1000_IVAR0, index, ivar);
779 if (tx_queue > IGB_N0_QUEUE) {
780 index = (tx_queue & 0x7);
781 ivar = array_rd32(E1000_IVAR0, index);
783 /* vector goes into second byte of register */
784 ivar = ivar & 0xFFFF00FF;
785 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
787 /* vector goes into high byte of register */
788 ivar = ivar & 0x00FFFFFF;
789 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
791 array_wr32(E1000_IVAR0, index, ivar);
793 q_vector->eims_value = 1 << msix_vector;
797 /* 82580 uses the same table-based approach as 82576 but has fewer
798 entries as a result we carry over for queues greater than 4. */
799 if (rx_queue > IGB_N0_QUEUE) {
800 index = (rx_queue >> 1);
801 ivar = array_rd32(E1000_IVAR0, index);
802 if (rx_queue & 0x1) {
803 /* vector goes into third byte of register */
804 ivar = ivar & 0xFF00FFFF;
805 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
807 /* vector goes into low byte of register */
808 ivar = ivar & 0xFFFFFF00;
809 ivar |= msix_vector | E1000_IVAR_VALID;
811 array_wr32(E1000_IVAR0, index, ivar);
813 if (tx_queue > IGB_N0_QUEUE) {
814 index = (tx_queue >> 1);
815 ivar = array_rd32(E1000_IVAR0, index);
816 if (tx_queue & 0x1) {
817 /* vector goes into high byte of register */
818 ivar = ivar & 0x00FFFFFF;
819 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
821 /* vector goes into second byte of register */
822 ivar = ivar & 0xFFFF00FF;
823 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
825 array_wr32(E1000_IVAR0, index, ivar);
827 q_vector->eims_value = 1 << msix_vector;
834 /* add q_vector eims value to global eims_enable_mask */
835 adapter->eims_enable_mask |= q_vector->eims_value;
837 /* configure q_vector to set itr on first interrupt */
838 q_vector->set_itr = 1;
842 * igb_configure_msix - Configure MSI-X hardware
844 * igb_configure_msix sets up the hardware to properly
845 * generate MSI-X interrupts.
847 static void igb_configure_msix(struct igb_adapter *adapter)
851 struct e1000_hw *hw = &adapter->hw;
853 adapter->eims_enable_mask = 0;
855 /* set vector for other causes, i.e. link changes */
856 switch (hw->mac.type) {
858 tmp = rd32(E1000_CTRL_EXT);
859 /* enable MSI-X PBA support*/
860 tmp |= E1000_CTRL_EXT_PBA_CLR;
862 /* Auto-Mask interrupts upon ICR read. */
863 tmp |= E1000_CTRL_EXT_EIAME;
864 tmp |= E1000_CTRL_EXT_IRCA;
866 wr32(E1000_CTRL_EXT, tmp);
868 /* enable msix_other interrupt */
869 array_wr32(E1000_MSIXBM(0), vector++,
871 adapter->eims_other = E1000_EIMS_OTHER;
878 /* Turn on MSI-X capability first, or our settings
879 * won't stick. And it will take days to debug. */
880 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
881 E1000_GPIE_PBA | E1000_GPIE_EIAME |
884 /* enable msix_other interrupt */
885 adapter->eims_other = 1 << vector;
886 tmp = (vector++ | E1000_IVAR_VALID) << 8;
888 wr32(E1000_IVAR_MISC, tmp);
891 /* do nothing, since nothing else supports MSI-X */
893 } /* switch (hw->mac.type) */
895 adapter->eims_enable_mask |= adapter->eims_other;
897 for (i = 0; i < adapter->num_q_vectors; i++)
898 igb_assign_vector(adapter->q_vector[i], vector++);
904 * igb_request_msix - Initialize MSI-X interrupts
906 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
909 static int igb_request_msix(struct igb_adapter *adapter)
911 struct net_device *netdev = adapter->netdev;
912 struct e1000_hw *hw = &adapter->hw;
913 int i, err = 0, vector = 0;
915 err = request_irq(adapter->msix_entries[vector].vector,
916 igb_msix_other, 0, netdev->name, adapter);
921 for (i = 0; i < adapter->num_q_vectors; i++) {
922 struct igb_q_vector *q_vector = adapter->q_vector[i];
924 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
926 if (q_vector->rx_ring && q_vector->tx_ring)
927 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
928 q_vector->rx_ring->queue_index);
929 else if (q_vector->tx_ring)
930 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
931 q_vector->tx_ring->queue_index);
932 else if (q_vector->rx_ring)
933 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
934 q_vector->rx_ring->queue_index);
936 sprintf(q_vector->name, "%s-unused", netdev->name);
938 err = request_irq(adapter->msix_entries[vector].vector,
939 igb_msix_ring, 0, q_vector->name,
946 igb_configure_msix(adapter);
952 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
954 if (adapter->msix_entries) {
955 pci_disable_msix(adapter->pdev);
956 kfree(adapter->msix_entries);
957 adapter->msix_entries = NULL;
958 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
959 pci_disable_msi(adapter->pdev);
964 * igb_free_q_vectors - Free memory allocated for interrupt vectors
965 * @adapter: board private structure to initialize
967 * This function frees the memory allocated to the q_vectors. In addition if
968 * NAPI is enabled it will delete any references to the NAPI struct prior
969 * to freeing the q_vector.
971 static void igb_free_q_vectors(struct igb_adapter *adapter)
975 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
976 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
977 adapter->q_vector[v_idx] = NULL;
980 netif_napi_del(&q_vector->napi);
983 adapter->num_q_vectors = 0;
987 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
989 * This function resets the device so that it has 0 rx queues, tx queues, and
990 * MSI-X interrupts allocated.
992 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
994 igb_free_queues(adapter);
995 igb_free_q_vectors(adapter);
996 igb_reset_interrupt_capability(adapter);
1000 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1002 * Attempt to configure interrupts using the best available
1003 * capabilities of the hardware and kernel.
1005 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1010 /* Number of supported queues. */
1011 adapter->num_rx_queues = adapter->rss_queues;
1012 if (adapter->vfs_allocated_count)
1013 adapter->num_tx_queues = 1;
1015 adapter->num_tx_queues = adapter->rss_queues;
1017 /* start with one vector for every rx queue */
1018 numvecs = adapter->num_rx_queues;
1020 /* if tx handler is separate add 1 for every tx queue */
1021 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1022 numvecs += adapter->num_tx_queues;
1024 /* store the number of vectors reserved for queues */
1025 adapter->num_q_vectors = numvecs;
1027 /* add 1 vector for link status interrupts */
1029 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1031 if (!adapter->msix_entries)
1034 for (i = 0; i < numvecs; i++)
1035 adapter->msix_entries[i].entry = i;
1037 err = pci_enable_msix(adapter->pdev,
1038 adapter->msix_entries,
1043 igb_reset_interrupt_capability(adapter);
1045 /* If we can't do MSI-X, try MSI */
1047 #ifdef CONFIG_PCI_IOV
1048 /* disable SR-IOV for non MSI-X configurations */
1049 if (adapter->vf_data) {
1050 struct e1000_hw *hw = &adapter->hw;
1051 /* disable iov and allow time for transactions to clear */
1052 pci_disable_sriov(adapter->pdev);
1055 kfree(adapter->vf_data);
1056 adapter->vf_data = NULL;
1057 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1060 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1063 adapter->vfs_allocated_count = 0;
1064 adapter->rss_queues = 1;
1065 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1066 adapter->num_rx_queues = 1;
1067 adapter->num_tx_queues = 1;
1068 adapter->num_q_vectors = 1;
1069 if (!pci_enable_msi(adapter->pdev))
1070 adapter->flags |= IGB_FLAG_HAS_MSI;
1072 /* Notify the stack of the (possibly) reduced queue counts. */
1073 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1074 return netif_set_real_num_rx_queues(adapter->netdev,
1075 adapter->num_rx_queues);
1079 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1080 * @adapter: board private structure to initialize
1082 * We allocate one q_vector per queue interrupt. If allocation fails we
1085 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1087 struct igb_q_vector *q_vector;
1088 struct e1000_hw *hw = &adapter->hw;
1091 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1092 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1095 q_vector->adapter = adapter;
1096 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1097 q_vector->itr_val = IGB_START_ITR;
1098 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1099 adapter->q_vector[v_idx] = q_vector;
1104 igb_free_q_vectors(adapter);
1108 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1109 int ring_idx, int v_idx)
1111 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1113 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1114 q_vector->rx_ring->q_vector = q_vector;
1115 q_vector->itr_val = adapter->rx_itr_setting;
1116 if (q_vector->itr_val && q_vector->itr_val <= 3)
1117 q_vector->itr_val = IGB_START_ITR;
1120 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1121 int ring_idx, int v_idx)
1123 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1125 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1126 q_vector->tx_ring->q_vector = q_vector;
1127 q_vector->itr_val = adapter->tx_itr_setting;
1128 q_vector->tx_work_limit = adapter->tx_work_limit;
1129 if (q_vector->itr_val && q_vector->itr_val <= 3)
1130 q_vector->itr_val = IGB_START_ITR;
1134 * igb_map_ring_to_vector - maps allocated queues to vectors
1136 * This function maps the recently allocated queues to vectors.
1138 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1143 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1144 (adapter->num_q_vectors < adapter->num_tx_queues))
1147 if (adapter->num_q_vectors >=
1148 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1149 for (i = 0; i < adapter->num_rx_queues; i++)
1150 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1151 for (i = 0; i < adapter->num_tx_queues; i++)
1152 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1154 for (i = 0; i < adapter->num_rx_queues; i++) {
1155 if (i < adapter->num_tx_queues)
1156 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1157 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1159 for (; i < adapter->num_tx_queues; i++)
1160 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1166 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1168 * This function initializes the interrupts and allocates all of the queues.
1170 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1172 struct pci_dev *pdev = adapter->pdev;
1175 err = igb_set_interrupt_capability(adapter);
1179 err = igb_alloc_q_vectors(adapter);
1181 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1182 goto err_alloc_q_vectors;
1185 err = igb_alloc_queues(adapter);
1187 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1188 goto err_alloc_queues;
1191 err = igb_map_ring_to_vector(adapter);
1193 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1194 goto err_map_queues;
1200 igb_free_queues(adapter);
1202 igb_free_q_vectors(adapter);
1203 err_alloc_q_vectors:
1204 igb_reset_interrupt_capability(adapter);
1209 * igb_request_irq - initialize interrupts
1211 * Attempts to configure interrupts using the best available
1212 * capabilities of the hardware and kernel.
1214 static int igb_request_irq(struct igb_adapter *adapter)
1216 struct net_device *netdev = adapter->netdev;
1217 struct pci_dev *pdev = adapter->pdev;
1220 if (adapter->msix_entries) {
1221 err = igb_request_msix(adapter);
1224 /* fall back to MSI */
1225 igb_clear_interrupt_scheme(adapter);
1226 if (!pci_enable_msi(adapter->pdev))
1227 adapter->flags |= IGB_FLAG_HAS_MSI;
1228 igb_free_all_tx_resources(adapter);
1229 igb_free_all_rx_resources(adapter);
1230 adapter->num_tx_queues = 1;
1231 adapter->num_rx_queues = 1;
1232 adapter->num_q_vectors = 1;
1233 err = igb_alloc_q_vectors(adapter);
1236 "Unable to allocate memory for vectors\n");
1239 err = igb_alloc_queues(adapter);
1242 "Unable to allocate memory for queues\n");
1243 igb_free_q_vectors(adapter);
1246 igb_setup_all_tx_resources(adapter);
1247 igb_setup_all_rx_resources(adapter);
1249 igb_assign_vector(adapter->q_vector[0], 0);
1252 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1253 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1254 netdev->name, adapter);
1258 /* fall back to legacy interrupts */
1259 igb_reset_interrupt_capability(adapter);
1260 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1263 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1264 netdev->name, adapter);
1267 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1274 static void igb_free_irq(struct igb_adapter *adapter)
1276 if (adapter->msix_entries) {
1279 free_irq(adapter->msix_entries[vector++].vector, adapter);
1281 for (i = 0; i < adapter->num_q_vectors; i++) {
1282 struct igb_q_vector *q_vector = adapter->q_vector[i];
1283 free_irq(adapter->msix_entries[vector++].vector,
1287 free_irq(adapter->pdev->irq, adapter);
1292 * igb_irq_disable - Mask off interrupt generation on the NIC
1293 * @adapter: board private structure
1295 static void igb_irq_disable(struct igb_adapter *adapter)
1297 struct e1000_hw *hw = &adapter->hw;
1300 * we need to be careful when disabling interrupts. The VFs are also
1301 * mapped into these registers and so clearing the bits can cause
1302 * issues on the VF drivers so we only need to clear what we set
1304 if (adapter->msix_entries) {
1305 u32 regval = rd32(E1000_EIAM);
1306 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1307 wr32(E1000_EIMC, adapter->eims_enable_mask);
1308 regval = rd32(E1000_EIAC);
1309 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1313 wr32(E1000_IMC, ~0);
1315 if (adapter->msix_entries) {
1317 for (i = 0; i < adapter->num_q_vectors; i++)
1318 synchronize_irq(adapter->msix_entries[i].vector);
1320 synchronize_irq(adapter->pdev->irq);
1325 * igb_irq_enable - Enable default interrupt generation settings
1326 * @adapter: board private structure
1328 static void igb_irq_enable(struct igb_adapter *adapter)
1330 struct e1000_hw *hw = &adapter->hw;
1332 if (adapter->msix_entries) {
1333 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1334 u32 regval = rd32(E1000_EIAC);
1335 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1336 regval = rd32(E1000_EIAM);
1337 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1338 wr32(E1000_EIMS, adapter->eims_enable_mask);
1339 if (adapter->vfs_allocated_count) {
1340 wr32(E1000_MBVFIMR, 0xFF);
1341 ims |= E1000_IMS_VMMB;
1343 if (adapter->hw.mac.type == e1000_82580)
1344 ims |= E1000_IMS_DRSTA;
1346 wr32(E1000_IMS, ims);
1348 wr32(E1000_IMS, IMS_ENABLE_MASK |
1350 wr32(E1000_IAM, IMS_ENABLE_MASK |
1355 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1357 struct e1000_hw *hw = &adapter->hw;
1358 u16 vid = adapter->hw.mng_cookie.vlan_id;
1359 u16 old_vid = adapter->mng_vlan_id;
1361 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1362 /* add VID to filter table */
1363 igb_vfta_set(hw, vid, true);
1364 adapter->mng_vlan_id = vid;
1366 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1369 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1371 !test_bit(old_vid, adapter->active_vlans)) {
1372 /* remove VID from filter table */
1373 igb_vfta_set(hw, old_vid, false);
1378 * igb_release_hw_control - release control of the h/w to f/w
1379 * @adapter: address of board private structure
1381 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1382 * For ASF and Pass Through versions of f/w this means that the
1383 * driver is no longer loaded.
1386 static void igb_release_hw_control(struct igb_adapter *adapter)
1388 struct e1000_hw *hw = &adapter->hw;
1391 /* Let firmware take over control of h/w */
1392 ctrl_ext = rd32(E1000_CTRL_EXT);
1393 wr32(E1000_CTRL_EXT,
1394 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1398 * igb_get_hw_control - get control of the h/w from f/w
1399 * @adapter: address of board private structure
1401 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1402 * For ASF and Pass Through versions of f/w this means that
1403 * the driver is loaded.
1406 static void igb_get_hw_control(struct igb_adapter *adapter)
1408 struct e1000_hw *hw = &adapter->hw;
1411 /* Let firmware know the driver has taken over */
1412 ctrl_ext = rd32(E1000_CTRL_EXT);
1413 wr32(E1000_CTRL_EXT,
1414 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1418 * igb_configure - configure the hardware for RX and TX
1419 * @adapter: private board structure
1421 static void igb_configure(struct igb_adapter *adapter)
1423 struct net_device *netdev = adapter->netdev;
1426 igb_get_hw_control(adapter);
1427 igb_set_rx_mode(netdev);
1429 igb_restore_vlan(adapter);
1431 igb_setup_tctl(adapter);
1432 igb_setup_mrqc(adapter);
1433 igb_setup_rctl(adapter);
1435 igb_configure_tx(adapter);
1436 igb_configure_rx(adapter);
1438 igb_rx_fifo_flush_82575(&adapter->hw);
1440 /* call igb_desc_unused which always leaves
1441 * at least 1 descriptor unused to make sure
1442 * next_to_use != next_to_clean */
1443 for (i = 0; i < adapter->num_rx_queues; i++) {
1444 struct igb_ring *ring = adapter->rx_ring[i];
1445 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1450 * igb_power_up_link - Power up the phy/serdes link
1451 * @adapter: address of board private structure
1453 void igb_power_up_link(struct igb_adapter *adapter)
1455 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456 igb_power_up_phy_copper(&adapter->hw);
1458 igb_power_up_serdes_link_82575(&adapter->hw);
1462 * igb_power_down_link - Power down the phy/serdes link
1463 * @adapter: address of board private structure
1465 static void igb_power_down_link(struct igb_adapter *adapter)
1467 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1468 igb_power_down_phy_copper_82575(&adapter->hw);
1470 igb_shutdown_serdes_link_82575(&adapter->hw);
1474 * igb_up - Open the interface and prepare it to handle traffic
1475 * @adapter: board private structure
1477 int igb_up(struct igb_adapter *adapter)
1479 struct e1000_hw *hw = &adapter->hw;
1482 /* hardware has been reset, we need to reload some things */
1483 igb_configure(adapter);
1485 clear_bit(__IGB_DOWN, &adapter->state);
1487 for (i = 0; i < adapter->num_q_vectors; i++) {
1488 struct igb_q_vector *q_vector = adapter->q_vector[i];
1489 napi_enable(&q_vector->napi);
1491 if (adapter->msix_entries)
1492 igb_configure_msix(adapter);
1494 igb_assign_vector(adapter->q_vector[0], 0);
1496 /* Clear any pending interrupts. */
1498 igb_irq_enable(adapter);
1500 /* notify VFs that reset has been completed */
1501 if (adapter->vfs_allocated_count) {
1502 u32 reg_data = rd32(E1000_CTRL_EXT);
1503 reg_data |= E1000_CTRL_EXT_PFRSTD;
1504 wr32(E1000_CTRL_EXT, reg_data);
1507 netif_tx_start_all_queues(adapter->netdev);
1509 /* start the watchdog. */
1510 hw->mac.get_link_status = 1;
1511 schedule_work(&adapter->watchdog_task);
1516 void igb_down(struct igb_adapter *adapter)
1518 struct net_device *netdev = adapter->netdev;
1519 struct e1000_hw *hw = &adapter->hw;
1523 /* signal that we're down so the interrupt handler does not
1524 * reschedule our watchdog timer */
1525 set_bit(__IGB_DOWN, &adapter->state);
1527 /* disable receives in the hardware */
1528 rctl = rd32(E1000_RCTL);
1529 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1530 /* flush and sleep below */
1532 netif_tx_stop_all_queues(netdev);
1534 /* disable transmits in the hardware */
1535 tctl = rd32(E1000_TCTL);
1536 tctl &= ~E1000_TCTL_EN;
1537 wr32(E1000_TCTL, tctl);
1538 /* flush both disables and wait for them to finish */
1542 for (i = 0; i < adapter->num_q_vectors; i++) {
1543 struct igb_q_vector *q_vector = adapter->q_vector[i];
1544 napi_disable(&q_vector->napi);
1547 igb_irq_disable(adapter);
1549 del_timer_sync(&adapter->watchdog_timer);
1550 del_timer_sync(&adapter->phy_info_timer);
1552 netif_carrier_off(netdev);
1554 /* record the stats before reset*/
1555 spin_lock(&adapter->stats64_lock);
1556 igb_update_stats(adapter, &adapter->stats64);
1557 spin_unlock(&adapter->stats64_lock);
1559 adapter->link_speed = 0;
1560 adapter->link_duplex = 0;
1562 if (!pci_channel_offline(adapter->pdev))
1564 igb_clean_all_tx_rings(adapter);
1565 igb_clean_all_rx_rings(adapter);
1566 #ifdef CONFIG_IGB_DCA
1568 /* since we reset the hardware DCA settings were cleared */
1569 igb_setup_dca(adapter);
1573 void igb_reinit_locked(struct igb_adapter *adapter)
1575 WARN_ON(in_interrupt());
1576 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1580 clear_bit(__IGB_RESETTING, &adapter->state);
1583 void igb_reset(struct igb_adapter *adapter)
1585 struct pci_dev *pdev = adapter->pdev;
1586 struct e1000_hw *hw = &adapter->hw;
1587 struct e1000_mac_info *mac = &hw->mac;
1588 struct e1000_fc_info *fc = &hw->fc;
1589 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1592 /* Repartition Pba for greater than 9k mtu
1593 * To take effect CTRL.RST is required.
1595 switch (mac->type) {
1598 pba = rd32(E1000_RXPBS);
1599 pba = igb_rxpbs_adjust_82580(pba);
1602 pba = rd32(E1000_RXPBS);
1603 pba &= E1000_RXPBS_SIZE_MASK_82576;
1607 pba = E1000_PBA_34K;
1611 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1612 (mac->type < e1000_82576)) {
1613 /* adjust PBA for jumbo frames */
1614 wr32(E1000_PBA, pba);
1616 /* To maintain wire speed transmits, the Tx FIFO should be
1617 * large enough to accommodate two full transmit packets,
1618 * rounded up to the next 1KB and expressed in KB. Likewise,
1619 * the Rx FIFO should be large enough to accommodate at least
1620 * one full receive packet and is similarly rounded up and
1621 * expressed in KB. */
1622 pba = rd32(E1000_PBA);
1623 /* upper 16 bits has Tx packet buffer allocation size in KB */
1624 tx_space = pba >> 16;
1625 /* lower 16 bits has Rx packet buffer allocation size in KB */
1627 /* the tx fifo also stores 16 bytes of information about the tx
1628 * but don't include ethernet FCS because hardware appends it */
1629 min_tx_space = (adapter->max_frame_size +
1630 sizeof(union e1000_adv_tx_desc) -
1632 min_tx_space = ALIGN(min_tx_space, 1024);
1633 min_tx_space >>= 10;
1634 /* software strips receive CRC, so leave room for it */
1635 min_rx_space = adapter->max_frame_size;
1636 min_rx_space = ALIGN(min_rx_space, 1024);
1637 min_rx_space >>= 10;
1639 /* If current Tx allocation is less than the min Tx FIFO size,
1640 * and the min Tx FIFO size is less than the current Rx FIFO
1641 * allocation, take space away from current Rx allocation */
1642 if (tx_space < min_tx_space &&
1643 ((min_tx_space - tx_space) < pba)) {
1644 pba = pba - (min_tx_space - tx_space);
1646 /* if short on rx space, rx wins and must trump tx
1648 if (pba < min_rx_space)
1651 wr32(E1000_PBA, pba);
1654 /* flow control settings */
1655 /* The high water mark must be low enough to fit one full frame
1656 * (or the size used for early receive) above it in the Rx FIFO.
1657 * Set it to the lower of:
1658 * - 90% of the Rx FIFO size, or
1659 * - the full Rx FIFO size minus one full frame */
1660 hwm = min(((pba << 10) * 9 / 10),
1661 ((pba << 10) - 2 * adapter->max_frame_size));
1663 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1664 fc->low_water = fc->high_water - 16;
1665 fc->pause_time = 0xFFFF;
1667 fc->current_mode = fc->requested_mode;
1669 /* disable receive for all VFs and wait one second */
1670 if (adapter->vfs_allocated_count) {
1672 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1673 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1675 /* ping all the active vfs to let them know we are going down */
1676 igb_ping_all_vfs(adapter);
1678 /* disable transmits and receives */
1679 wr32(E1000_VFRE, 0);
1680 wr32(E1000_VFTE, 0);
1683 /* Allow time for pending master requests to run */
1684 hw->mac.ops.reset_hw(hw);
1687 if (hw->mac.ops.init_hw(hw))
1688 dev_err(&pdev->dev, "Hardware Error\n");
1689 if (hw->mac.type > e1000_82580) {
1690 if (adapter->flags & IGB_FLAG_DMAC) {
1694 * DMA Coalescing high water mark needs to be higher
1695 * than * the * Rx threshold. The Rx threshold is
1696 * currently * pba - 6, so we * should use a high water
1697 * mark of pba * - 4. */
1698 hwm = (pba - 4) << 10;
1700 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1701 & E1000_DMACR_DMACTHR_MASK);
1703 /* transition to L0x or L1 if available..*/
1704 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1706 /* watchdog timer= +-1000 usec in 32usec intervals */
1708 wr32(E1000_DMACR, reg);
1710 /* no lower threshold to disable coalescing(smart fifb)
1712 wr32(E1000_DMCRTRH, 0);
1714 /* set hwm to PBA - 2 * max frame size */
1715 wr32(E1000_FCRTC, hwm);
1718 * This sets the time to wait before requesting tran-
1719 * sition to * low power state to number of usecs needed
1720 * to receive 1 512 * byte frame at gigabit line rate
1722 reg = rd32(E1000_DMCTLX);
1723 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1725 /* Delay 255 usec before entering Lx state. */
1727 wr32(E1000_DMCTLX, reg);
1729 /* free space in Tx packet buffer to wake from DMAC */
1732 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1735 /* make low power state decision controlled by DMAC */
1736 reg = rd32(E1000_PCIEMISC);
1737 reg |= E1000_PCIEMISC_LX_DECISION;
1738 wr32(E1000_PCIEMISC, reg);
1739 } /* end if IGB_FLAG_DMAC set */
1741 if (hw->mac.type == e1000_82580) {
1742 u32 reg = rd32(E1000_PCIEMISC);
1743 wr32(E1000_PCIEMISC,
1744 reg & ~E1000_PCIEMISC_LX_DECISION);
1746 if (!netif_running(adapter->netdev))
1747 igb_power_down_link(adapter);
1749 igb_update_mng_vlan(adapter);
1751 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1752 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1754 igb_get_phy_info(hw);
1757 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1760 * Since there is no support for separate rx/tx vlan accel
1761 * enable/disable make sure tx flag is always in same state as rx.
1763 if (features & NETIF_F_HW_VLAN_RX)
1764 features |= NETIF_F_HW_VLAN_TX;
1766 features &= ~NETIF_F_HW_VLAN_TX;
1771 static int igb_set_features(struct net_device *netdev, u32 features)
1773 struct igb_adapter *adapter = netdev_priv(netdev);
1775 u32 changed = netdev->features ^ features;
1777 for (i = 0; i < adapter->num_rx_queues; i++) {
1778 if (features & NETIF_F_RXCSUM)
1779 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1781 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1784 if (changed & NETIF_F_HW_VLAN_RX)
1785 igb_vlan_mode(netdev, features);
1790 static const struct net_device_ops igb_netdev_ops = {
1791 .ndo_open = igb_open,
1792 .ndo_stop = igb_close,
1793 .ndo_start_xmit = igb_xmit_frame,
1794 .ndo_get_stats64 = igb_get_stats64,
1795 .ndo_set_rx_mode = igb_set_rx_mode,
1796 .ndo_set_mac_address = igb_set_mac,
1797 .ndo_change_mtu = igb_change_mtu,
1798 .ndo_do_ioctl = igb_ioctl,
1799 .ndo_tx_timeout = igb_tx_timeout,
1800 .ndo_validate_addr = eth_validate_addr,
1801 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1802 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1803 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1804 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1805 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1806 .ndo_get_vf_config = igb_ndo_get_vf_config,
1807 #ifdef CONFIG_NET_POLL_CONTROLLER
1808 .ndo_poll_controller = igb_netpoll,
1810 .ndo_fix_features = igb_fix_features,
1811 .ndo_set_features = igb_set_features,
1815 * igb_probe - Device Initialization Routine
1816 * @pdev: PCI device information struct
1817 * @ent: entry in igb_pci_tbl
1819 * Returns 0 on success, negative on failure
1821 * igb_probe initializes an adapter identified by a pci_dev structure.
1822 * The OS initialization, configuring of the adapter private structure,
1823 * and a hardware reset occur.
1825 static int __devinit igb_probe(struct pci_dev *pdev,
1826 const struct pci_device_id *ent)
1828 struct net_device *netdev;
1829 struct igb_adapter *adapter;
1830 struct e1000_hw *hw;
1831 u16 eeprom_data = 0;
1833 static int global_quad_port_a; /* global quad port a indication */
1834 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1835 unsigned long mmio_start, mmio_len;
1836 int err, pci_using_dac;
1837 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1838 u8 part_str[E1000_PBANUM_LENGTH];
1840 /* Catch broken hardware that put the wrong VF device ID in
1841 * the PCIe SR-IOV capability.
1843 if (pdev->is_virtfn) {
1844 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1845 pci_name(pdev), pdev->vendor, pdev->device);
1849 err = pci_enable_device_mem(pdev);
1854 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1856 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1860 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1862 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1864 dev_err(&pdev->dev, "No usable DMA "
1865 "configuration, aborting\n");
1871 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1877 pci_enable_pcie_error_reporting(pdev);
1879 pci_set_master(pdev);
1880 pci_save_state(pdev);
1883 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1886 goto err_alloc_etherdev;
1888 SET_NETDEV_DEV(netdev, &pdev->dev);
1890 pci_set_drvdata(pdev, netdev);
1891 adapter = netdev_priv(netdev);
1892 adapter->netdev = netdev;
1893 adapter->pdev = pdev;
1896 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1898 mmio_start = pci_resource_start(pdev, 0);
1899 mmio_len = pci_resource_len(pdev, 0);
1902 hw->hw_addr = ioremap(mmio_start, mmio_len);
1906 netdev->netdev_ops = &igb_netdev_ops;
1907 igb_set_ethtool_ops(netdev);
1908 netdev->watchdog_timeo = 5 * HZ;
1910 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1912 netdev->mem_start = mmio_start;
1913 netdev->mem_end = mmio_start + mmio_len;
1915 /* PCI config space info */
1916 hw->vendor_id = pdev->vendor;
1917 hw->device_id = pdev->device;
1918 hw->revision_id = pdev->revision;
1919 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1920 hw->subsystem_device_id = pdev->subsystem_device;
1922 /* Copy the default MAC, PHY and NVM function pointers */
1923 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1924 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1925 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1926 /* Initialize skew-specific constants */
1927 err = ei->get_invariants(hw);
1931 /* setup the private structure */
1932 err = igb_sw_init(adapter);
1936 igb_get_bus_info_pcie(hw);
1938 hw->phy.autoneg_wait_to_complete = false;
1940 /* Copper options */
1941 if (hw->phy.media_type == e1000_media_type_copper) {
1942 hw->phy.mdix = AUTO_ALL_MODES;
1943 hw->phy.disable_polarity_correction = false;
1944 hw->phy.ms_type = e1000_ms_hw_default;
1947 if (igb_check_reset_block(hw))
1948 dev_info(&pdev->dev,
1949 "PHY reset is blocked due to SOL/IDER session.\n");
1951 netdev->hw_features = NETIF_F_SG |
1959 netdev->features = netdev->hw_features |
1960 NETIF_F_HW_VLAN_TX |
1961 NETIF_F_HW_VLAN_FILTER;
1963 netdev->vlan_features |= NETIF_F_TSO;
1964 netdev->vlan_features |= NETIF_F_TSO6;
1965 netdev->vlan_features |= NETIF_F_IP_CSUM;
1966 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1967 netdev->vlan_features |= NETIF_F_SG;
1969 if (pci_using_dac) {
1970 netdev->features |= NETIF_F_HIGHDMA;
1971 netdev->vlan_features |= NETIF_F_HIGHDMA;
1974 if (hw->mac.type >= e1000_82576) {
1975 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1976 netdev->features |= NETIF_F_SCTP_CSUM;
1979 netdev->priv_flags |= IFF_UNICAST_FLT;
1981 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1983 /* before reading the NVM, reset the controller to put the device in a
1984 * known good starting state */
1985 hw->mac.ops.reset_hw(hw);
1987 /* make sure the NVM is good */
1988 if (hw->nvm.ops.validate(hw) < 0) {
1989 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1994 /* copy the MAC address out of the NVM */
1995 if (hw->mac.ops.read_mac_addr(hw))
1996 dev_err(&pdev->dev, "NVM Read Error\n");
1998 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1999 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2001 if (!is_valid_ether_addr(netdev->perm_addr)) {
2002 dev_err(&pdev->dev, "Invalid MAC Address\n");
2007 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2008 (unsigned long) adapter);
2009 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2010 (unsigned long) adapter);
2012 INIT_WORK(&adapter->reset_task, igb_reset_task);
2013 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2015 /* Initialize link properties that are user-changeable */
2016 adapter->fc_autoneg = true;
2017 hw->mac.autoneg = true;
2018 hw->phy.autoneg_advertised = 0x2f;
2020 hw->fc.requested_mode = e1000_fc_default;
2021 hw->fc.current_mode = e1000_fc_default;
2023 igb_validate_mdi_setting(hw);
2025 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2026 * enable the ACPI Magic Packet filter
2029 if (hw->bus.func == 0)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2031 else if (hw->mac.type >= e1000_82580)
2032 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2033 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2035 else if (hw->bus.func == 1)
2036 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2038 if (eeprom_data & eeprom_apme_mask)
2039 adapter->eeprom_wol |= E1000_WUFC_MAG;
2041 /* now that we have the eeprom settings, apply the special cases where
2042 * the eeprom may be wrong or the board simply won't support wake on
2043 * lan on a particular port */
2044 switch (pdev->device) {
2045 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2046 adapter->eeprom_wol = 0;
2048 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2049 case E1000_DEV_ID_82576_FIBER:
2050 case E1000_DEV_ID_82576_SERDES:
2051 /* Wake events only supported on port A for dual fiber
2052 * regardless of eeprom setting */
2053 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2054 adapter->eeprom_wol = 0;
2056 case E1000_DEV_ID_82576_QUAD_COPPER:
2057 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2058 /* if quad port adapter, disable WoL on all but port A */
2059 if (global_quad_port_a != 0)
2060 adapter->eeprom_wol = 0;
2062 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2063 /* Reset for multiple quad port adapters */
2064 if (++global_quad_port_a == 4)
2065 global_quad_port_a = 0;
2069 /* initialize the wol settings based on the eeprom settings */
2070 adapter->wol = adapter->eeprom_wol;
2071 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2073 /* reset the hardware with the new settings */
2076 /* let the f/w know that the h/w is now under the control of the
2078 igb_get_hw_control(adapter);
2080 strcpy(netdev->name, "eth%d");
2081 err = register_netdev(netdev);
2085 igb_vlan_mode(netdev, netdev->features);
2087 /* carrier off reporting is important to ethtool even BEFORE open */
2088 netif_carrier_off(netdev);
2090 #ifdef CONFIG_IGB_DCA
2091 if (dca_add_requester(&pdev->dev) == 0) {
2092 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2093 dev_info(&pdev->dev, "DCA enabled\n");
2094 igb_setup_dca(adapter);
2098 /* do hw tstamp init after resetting */
2099 igb_init_hw_timer(adapter);
2101 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2102 /* print bus type/speed/width info */
2103 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2105 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2106 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2108 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2109 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2110 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2114 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2116 strcpy(part_str, "Unknown");
2117 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2118 dev_info(&pdev->dev,
2119 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2120 adapter->msix_entries ? "MSI-X" :
2121 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2122 adapter->num_rx_queues, adapter->num_tx_queues);
2123 switch (hw->mac.type) {
2125 igb_set_eee_i350(hw);
2133 igb_release_hw_control(adapter);
2135 if (!igb_check_reset_block(hw))
2138 if (hw->flash_address)
2139 iounmap(hw->flash_address);
2141 igb_clear_interrupt_scheme(adapter);
2142 iounmap(hw->hw_addr);
2144 free_netdev(netdev);
2146 pci_release_selected_regions(pdev,
2147 pci_select_bars(pdev, IORESOURCE_MEM));
2150 pci_disable_device(pdev);
2155 * igb_remove - Device Removal Routine
2156 * @pdev: PCI device information struct
2158 * igb_remove is called by the PCI subsystem to alert the driver
2159 * that it should release a PCI device. The could be caused by a
2160 * Hot-Plug event, or because the driver is going to be removed from
2163 static void __devexit igb_remove(struct pci_dev *pdev)
2165 struct net_device *netdev = pci_get_drvdata(pdev);
2166 struct igb_adapter *adapter = netdev_priv(netdev);
2167 struct e1000_hw *hw = &adapter->hw;
2170 * The watchdog timer may be rescheduled, so explicitly
2171 * disable watchdog from being rescheduled.
2173 set_bit(__IGB_DOWN, &adapter->state);
2174 del_timer_sync(&adapter->watchdog_timer);
2175 del_timer_sync(&adapter->phy_info_timer);
2177 cancel_work_sync(&adapter->reset_task);
2178 cancel_work_sync(&adapter->watchdog_task);
2180 #ifdef CONFIG_IGB_DCA
2181 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182 dev_info(&pdev->dev, "DCA disabled\n");
2183 dca_remove_requester(&pdev->dev);
2184 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2190 * would have already happened in close and is redundant. */
2191 igb_release_hw_control(adapter);
2193 unregister_netdev(netdev);
2195 igb_clear_interrupt_scheme(adapter);
2197 #ifdef CONFIG_PCI_IOV
2198 /* reclaim resources allocated to VFs */
2199 if (adapter->vf_data) {
2200 /* disable iov and allow time for transactions to clear */
2201 pci_disable_sriov(pdev);
2204 kfree(adapter->vf_data);
2205 adapter->vf_data = NULL;
2206 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2209 dev_info(&pdev->dev, "IOV Disabled\n");
2213 iounmap(hw->hw_addr);
2214 if (hw->flash_address)
2215 iounmap(hw->flash_address);
2216 pci_release_selected_regions(pdev,
2217 pci_select_bars(pdev, IORESOURCE_MEM));
2219 free_netdev(netdev);
2221 pci_disable_pcie_error_reporting(pdev);
2223 pci_disable_device(pdev);
2227 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2228 * @adapter: board private structure to initialize
2230 * This function initializes the vf specific data storage and then attempts to
2231 * allocate the VFs. The reason for ordering it this way is because it is much
2232 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2233 * the memory for the VFs.
2235 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2237 #ifdef CONFIG_PCI_IOV
2238 struct pci_dev *pdev = adapter->pdev;
2240 if (adapter->vfs_allocated_count) {
2241 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2242 sizeof(struct vf_data_storage),
2244 /* if allocation failed then we do not support SR-IOV */
2245 if (!adapter->vf_data) {
2246 adapter->vfs_allocated_count = 0;
2247 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2252 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2253 kfree(adapter->vf_data);
2254 adapter->vf_data = NULL;
2255 #endif /* CONFIG_PCI_IOV */
2256 adapter->vfs_allocated_count = 0;
2257 #ifdef CONFIG_PCI_IOV
2259 unsigned char mac_addr[ETH_ALEN];
2261 dev_info(&pdev->dev, "%d vfs allocated\n",
2262 adapter->vfs_allocated_count);
2263 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2264 random_ether_addr(mac_addr);
2265 igb_set_vf_mac(adapter, i, mac_addr);
2267 /* DMA Coalescing is not supported in IOV mode. */
2268 if (adapter->flags & IGB_FLAG_DMAC)
2269 adapter->flags &= ~IGB_FLAG_DMAC;
2271 #endif /* CONFIG_PCI_IOV */
2276 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2277 * @adapter: board private structure to initialize
2279 * igb_init_hw_timer initializes the function pointer and values for the hw
2280 * timer found in hardware.
2282 static void igb_init_hw_timer(struct igb_adapter *adapter)
2284 struct e1000_hw *hw = &adapter->hw;
2286 switch (hw->mac.type) {
2289 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290 adapter->cycles.read = igb_read_clock;
2291 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292 adapter->cycles.mult = 1;
2294 * The 82580 timesync updates the system timer every 8ns by 8ns
2295 * and the value cannot be shifted. Instead we need to shift
2296 * the registers to generate a 64bit timer value. As a result
2297 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2298 * 24 in order to generate a larger value for synchronization.
2300 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2301 /* disable system timer temporarily by setting bit 31 */
2302 wr32(E1000_TSAUXC, 0x80000000);
2305 /* Set registers so that rollover occurs soon to test this. */
2306 wr32(E1000_SYSTIMR, 0x00000000);
2307 wr32(E1000_SYSTIML, 0x80000000);
2308 wr32(E1000_SYSTIMH, 0x000000FF);
2311 /* enable system timer by clearing bit 31 */
2312 wr32(E1000_TSAUXC, 0x0);
2315 timecounter_init(&adapter->clock,
2317 ktime_to_ns(ktime_get_real()));
2319 * Synchronize our NIC clock against system wall clock. NIC
2320 * time stamp reading requires ~3us per sample, each sample
2321 * was pretty stable even under load => only require 10
2322 * samples for each offset comparison.
2324 memset(&adapter->compare, 0, sizeof(adapter->compare));
2325 adapter->compare.source = &adapter->clock;
2326 adapter->compare.target = ktime_get_real;
2327 adapter->compare.num_samples = 10;
2328 timecompare_update(&adapter->compare, 0);
2332 * Initialize hardware timer: we keep it running just in case
2333 * that some program needs it later on.
2335 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2336 adapter->cycles.read = igb_read_clock;
2337 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2338 adapter->cycles.mult = 1;
2340 * Scale the NIC clock cycle by a large factor so that
2341 * relatively small clock corrections can be added or
2342 * subtracted at each clock tick. The drawbacks of a large
2343 * factor are a) that the clock register overflows more quickly
2344 * (not such a big deal) and b) that the increment per tick has
2345 * to fit into 24 bits. As a result we need to use a shift of
2346 * 19 so we can fit a value of 16 into the TIMINCA register.
2348 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2350 (1 << E1000_TIMINCA_16NS_SHIFT) |
2351 (16 << IGB_82576_TSYNC_SHIFT));
2353 /* Set registers so that rollover occurs soon to test this. */
2354 wr32(E1000_SYSTIML, 0x00000000);
2355 wr32(E1000_SYSTIMH, 0xFF800000);
2358 timecounter_init(&adapter->clock,
2360 ktime_to_ns(ktime_get_real()));
2362 * Synchronize our NIC clock against system wall clock. NIC
2363 * time stamp reading requires ~3us per sample, each sample
2364 * was pretty stable even under load => only require 10
2365 * samples for each offset comparison.
2367 memset(&adapter->compare, 0, sizeof(adapter->compare));
2368 adapter->compare.source = &adapter->clock;
2369 adapter->compare.target = ktime_get_real;
2370 adapter->compare.num_samples = 10;
2371 timecompare_update(&adapter->compare, 0);
2374 /* 82575 does not support timesync */
2382 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2383 * @adapter: board private structure to initialize
2385 * igb_sw_init initializes the Adapter private data structure.
2386 * Fields are initialized based on PCI device information and
2387 * OS network device settings (MTU size).
2389 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2391 struct e1000_hw *hw = &adapter->hw;
2392 struct net_device *netdev = adapter->netdev;
2393 struct pci_dev *pdev = adapter->pdev;
2395 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2397 /* set default ring sizes */
2398 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2399 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2401 /* set default ITR values */
2402 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2403 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2405 /* set default work limits */
2406 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2408 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2410 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2412 spin_lock_init(&adapter->stats64_lock);
2413 #ifdef CONFIG_PCI_IOV
2414 switch (hw->mac.type) {
2418 dev_warn(&pdev->dev,
2419 "Maximum of 7 VFs per PF, using max\n");
2420 adapter->vfs_allocated_count = 7;
2422 adapter->vfs_allocated_count = max_vfs;
2427 #endif /* CONFIG_PCI_IOV */
2428 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429 /* i350 cannot do RSS and SR-IOV at the same time */
2430 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431 adapter->rss_queues = 1;
2434 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435 * then we should combine the queues into a queue pair in order to
2436 * conserve interrupts due to limited supply
2438 if ((adapter->rss_queues > 4) ||
2439 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2442 /* This call may decrease the number of queues */
2443 if (igb_init_interrupt_scheme(adapter)) {
2444 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2448 igb_probe_vfs(adapter);
2450 /* Explicitly disable IRQ since the NIC can be in any state. */
2451 igb_irq_disable(adapter);
2453 if (hw->mac.type == e1000_i350)
2454 adapter->flags &= ~IGB_FLAG_DMAC;
2456 set_bit(__IGB_DOWN, &adapter->state);
2461 * igb_open - Called when a network interface is made active
2462 * @netdev: network interface device structure
2464 * Returns 0 on success, negative value on failure
2466 * The open entry point is called when a network interface is made
2467 * active by the system (IFF_UP). At this point all resources needed
2468 * for transmit and receive operations are allocated, the interrupt
2469 * handler is registered with the OS, the watchdog timer is started,
2470 * and the stack is notified that the interface is ready.
2472 static int igb_open(struct net_device *netdev)
2474 struct igb_adapter *adapter = netdev_priv(netdev);
2475 struct e1000_hw *hw = &adapter->hw;
2479 /* disallow open during test */
2480 if (test_bit(__IGB_TESTING, &adapter->state))
2483 netif_carrier_off(netdev);
2485 /* allocate transmit descriptors */
2486 err = igb_setup_all_tx_resources(adapter);
2490 /* allocate receive descriptors */
2491 err = igb_setup_all_rx_resources(adapter);
2495 igb_power_up_link(adapter);
2497 /* before we allocate an interrupt, we must be ready to handle it.
2498 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2499 * as soon as we call pci_request_irq, so we have to setup our
2500 * clean_rx handler before we do so. */
2501 igb_configure(adapter);
2503 err = igb_request_irq(adapter);
2507 /* From here on the code is the same as igb_up() */
2508 clear_bit(__IGB_DOWN, &adapter->state);
2510 for (i = 0; i < adapter->num_q_vectors; i++) {
2511 struct igb_q_vector *q_vector = adapter->q_vector[i];
2512 napi_enable(&q_vector->napi);
2515 /* Clear any pending interrupts. */
2518 igb_irq_enable(adapter);
2520 /* notify VFs that reset has been completed */
2521 if (adapter->vfs_allocated_count) {
2522 u32 reg_data = rd32(E1000_CTRL_EXT);
2523 reg_data |= E1000_CTRL_EXT_PFRSTD;
2524 wr32(E1000_CTRL_EXT, reg_data);
2527 netif_tx_start_all_queues(netdev);
2529 /* start the watchdog. */
2530 hw->mac.get_link_status = 1;
2531 schedule_work(&adapter->watchdog_task);
2536 igb_release_hw_control(adapter);
2537 igb_power_down_link(adapter);
2538 igb_free_all_rx_resources(adapter);
2540 igb_free_all_tx_resources(adapter);
2548 * igb_close - Disables a network interface
2549 * @netdev: network interface device structure
2551 * Returns 0, this is not allowed to fail
2553 * The close entry point is called when an interface is de-activated
2554 * by the OS. The hardware is still under the driver's control, but
2555 * needs to be disabled. A global MAC reset is issued to stop the
2556 * hardware, and all transmit and receive resources are freed.
2558 static int igb_close(struct net_device *netdev)
2560 struct igb_adapter *adapter = netdev_priv(netdev);
2562 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2565 igb_free_irq(adapter);
2567 igb_free_all_tx_resources(adapter);
2568 igb_free_all_rx_resources(adapter);
2574 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2575 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2577 * Return 0 on success, negative on failure
2579 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2581 struct device *dev = tx_ring->dev;
2584 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2585 tx_ring->tx_buffer_info = vzalloc(size);
2586 if (!tx_ring->tx_buffer_info)
2589 /* round up to nearest 4K */
2590 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2591 tx_ring->size = ALIGN(tx_ring->size, 4096);
2593 tx_ring->desc = dma_alloc_coherent(dev,
2601 tx_ring->next_to_use = 0;
2602 tx_ring->next_to_clean = 0;
2606 vfree(tx_ring->tx_buffer_info);
2608 "Unable to allocate memory for the transmit descriptor ring\n");
2613 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2614 * (Descriptors) for all queues
2615 * @adapter: board private structure
2617 * Return 0 on success, negative on failure
2619 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2621 struct pci_dev *pdev = adapter->pdev;
2624 for (i = 0; i < adapter->num_tx_queues; i++) {
2625 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2628 "Allocation for Tx Queue %u failed\n", i);
2629 for (i--; i >= 0; i--)
2630 igb_free_tx_resources(adapter->tx_ring[i]);
2639 * igb_setup_tctl - configure the transmit control registers
2640 * @adapter: Board private structure
2642 void igb_setup_tctl(struct igb_adapter *adapter)
2644 struct e1000_hw *hw = &adapter->hw;
2647 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2648 wr32(E1000_TXDCTL(0), 0);
2650 /* Program the Transmit Control Register */
2651 tctl = rd32(E1000_TCTL);
2652 tctl &= ~E1000_TCTL_CT;
2653 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2654 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2656 igb_config_collision_dist(hw);
2658 /* Enable transmits */
2659 tctl |= E1000_TCTL_EN;
2661 wr32(E1000_TCTL, tctl);
2665 * igb_configure_tx_ring - Configure transmit ring after Reset
2666 * @adapter: board private structure
2667 * @ring: tx ring to configure
2669 * Configure a transmit ring after a reset.
2671 void igb_configure_tx_ring(struct igb_adapter *adapter,
2672 struct igb_ring *ring)
2674 struct e1000_hw *hw = &adapter->hw;
2676 u64 tdba = ring->dma;
2677 int reg_idx = ring->reg_idx;
2679 /* disable the queue */
2680 wr32(E1000_TXDCTL(reg_idx), 0);
2684 wr32(E1000_TDLEN(reg_idx),
2685 ring->count * sizeof(union e1000_adv_tx_desc));
2686 wr32(E1000_TDBAL(reg_idx),
2687 tdba & 0x00000000ffffffffULL);
2688 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2690 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2691 wr32(E1000_TDH(reg_idx), 0);
2692 writel(0, ring->tail);
2694 txdctl |= IGB_TX_PTHRESH;
2695 txdctl |= IGB_TX_HTHRESH << 8;
2696 txdctl |= IGB_TX_WTHRESH << 16;
2698 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2699 wr32(E1000_TXDCTL(reg_idx), txdctl);
2703 * igb_configure_tx - Configure transmit Unit after Reset
2704 * @adapter: board private structure
2706 * Configure the Tx unit of the MAC after a reset.
2708 static void igb_configure_tx(struct igb_adapter *adapter)
2712 for (i = 0; i < adapter->num_tx_queues; i++)
2713 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2717 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2718 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2720 * Returns 0 on success, negative on failure
2722 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2724 struct device *dev = rx_ring->dev;
2727 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2728 rx_ring->rx_buffer_info = vzalloc(size);
2729 if (!rx_ring->rx_buffer_info)
2732 desc_len = sizeof(union e1000_adv_rx_desc);
2734 /* Round up to nearest 4K */
2735 rx_ring->size = rx_ring->count * desc_len;
2736 rx_ring->size = ALIGN(rx_ring->size, 4096);
2738 rx_ring->desc = dma_alloc_coherent(dev,
2746 rx_ring->next_to_clean = 0;
2747 rx_ring->next_to_use = 0;
2752 vfree(rx_ring->rx_buffer_info);
2753 rx_ring->rx_buffer_info = NULL;
2754 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2760 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2761 * (Descriptors) for all queues
2762 * @adapter: board private structure
2764 * Return 0 on success, negative on failure
2766 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2768 struct pci_dev *pdev = adapter->pdev;
2771 for (i = 0; i < adapter->num_rx_queues; i++) {
2772 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2775 "Allocation for Rx Queue %u failed\n", i);
2776 for (i--; i >= 0; i--)
2777 igb_free_rx_resources(adapter->rx_ring[i]);
2786 * igb_setup_mrqc - configure the multiple receive queue control registers
2787 * @adapter: Board private structure
2789 static void igb_setup_mrqc(struct igb_adapter *adapter)
2791 struct e1000_hw *hw = &adapter->hw;
2793 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2798 static const u8 rsshash[40] = {
2799 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2800 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2801 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2802 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2804 /* Fill out hash function seeds */
2805 for (j = 0; j < 10; j++) {
2806 u32 rsskey = rsshash[(j * 4)];
2807 rsskey |= rsshash[(j * 4) + 1] << 8;
2808 rsskey |= rsshash[(j * 4) + 2] << 16;
2809 rsskey |= rsshash[(j * 4) + 3] << 24;
2810 array_wr32(E1000_RSSRK(0), j, rsskey);
2813 num_rx_queues = adapter->rss_queues;
2815 if (adapter->vfs_allocated_count) {
2816 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2817 switch (hw->mac.type) {
2834 if (hw->mac.type == e1000_82575)
2838 for (j = 0; j < (32 * 4); j++) {
2839 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2841 reta.bytes[j & 3] |= num_rx_queues << shift2;
2843 wr32(E1000_RETA(j >> 2), reta.dword);
2847 * Disable raw packet checksumming so that RSS hash is placed in
2848 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2849 * offloads as they are enabled by default
2851 rxcsum = rd32(E1000_RXCSUM);
2852 rxcsum |= E1000_RXCSUM_PCSD;
2854 if (adapter->hw.mac.type >= e1000_82576)
2855 /* Enable Receive Checksum Offload for SCTP */
2856 rxcsum |= E1000_RXCSUM_CRCOFL;
2858 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2859 wr32(E1000_RXCSUM, rxcsum);
2861 /* If VMDq is enabled then we set the appropriate mode for that, else
2862 * we default to RSS so that an RSS hash is calculated per packet even
2863 * if we are only using one queue */
2864 if (adapter->vfs_allocated_count) {
2865 if (hw->mac.type > e1000_82575) {
2866 /* Set the default pool for the PF's first queue */
2867 u32 vtctl = rd32(E1000_VT_CTL);
2868 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2869 E1000_VT_CTL_DISABLE_DEF_POOL);
2870 vtctl |= adapter->vfs_allocated_count <<
2871 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2872 wr32(E1000_VT_CTL, vtctl);
2874 if (adapter->rss_queues > 1)
2875 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2877 mrqc = E1000_MRQC_ENABLE_VMDQ;
2879 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2881 igb_vmm_control(adapter);
2884 * Generate RSS hash based on TCP port numbers and/or
2885 * IPv4/v6 src and dst addresses since UDP cannot be
2886 * hashed reliably due to IP fragmentation
2888 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2889 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2890 E1000_MRQC_RSS_FIELD_IPV6 |
2891 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2892 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2894 wr32(E1000_MRQC, mrqc);
2898 * igb_setup_rctl - configure the receive control registers
2899 * @adapter: Board private structure
2901 void igb_setup_rctl(struct igb_adapter *adapter)
2903 struct e1000_hw *hw = &adapter->hw;
2906 rctl = rd32(E1000_RCTL);
2908 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2909 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2911 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2912 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2915 * enable stripping of CRC. It's unlikely this will break BMC
2916 * redirection as it did with e1000. Newer features require
2917 * that the HW strips the CRC.
2919 rctl |= E1000_RCTL_SECRC;
2921 /* disable store bad packets and clear size bits. */
2922 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2924 /* enable LPE to prevent packets larger than max_frame_size */
2925 rctl |= E1000_RCTL_LPE;
2927 /* disable queue 0 to prevent tail write w/o re-config */
2928 wr32(E1000_RXDCTL(0), 0);
2930 /* Attention!!! For SR-IOV PF driver operations you must enable
2931 * queue drop for all VF and PF queues to prevent head of line blocking
2932 * if an un-trusted VF does not provide descriptors to hardware.
2934 if (adapter->vfs_allocated_count) {
2935 /* set all queue drop enable bits */
2936 wr32(E1000_QDE, ALL_QUEUES);
2939 wr32(E1000_RCTL, rctl);
2942 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2945 struct e1000_hw *hw = &adapter->hw;
2948 /* if it isn't the PF check to see if VFs are enabled and
2949 * increase the size to support vlan tags */
2950 if (vfn < adapter->vfs_allocated_count &&
2951 adapter->vf_data[vfn].vlans_enabled)
2952 size += VLAN_TAG_SIZE;
2954 vmolr = rd32(E1000_VMOLR(vfn));
2955 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2956 vmolr |= size | E1000_VMOLR_LPE;
2957 wr32(E1000_VMOLR(vfn), vmolr);
2963 * igb_rlpml_set - set maximum receive packet size
2964 * @adapter: board private structure
2966 * Configure maximum receivable packet size.
2968 static void igb_rlpml_set(struct igb_adapter *adapter)
2970 u32 max_frame_size = adapter->max_frame_size;
2971 struct e1000_hw *hw = &adapter->hw;
2972 u16 pf_id = adapter->vfs_allocated_count;
2975 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2977 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2978 * to our max jumbo frame size, in case we need to enable
2979 * jumbo frames on one of the rings later.
2980 * This will not pass over-length frames into the default
2981 * queue because it's gated by the VMOLR.RLPML.
2983 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2986 wr32(E1000_RLPML, max_frame_size);
2989 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2992 struct e1000_hw *hw = &adapter->hw;
2996 * This register exists only on 82576 and newer so if we are older then
2997 * we should exit and do nothing
2999 if (hw->mac.type < e1000_82576)
3002 vmolr = rd32(E1000_VMOLR(vfn));
3003 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3005 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3007 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3009 /* clear all bits that might not be set */
3010 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3012 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3013 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3015 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3018 if (vfn <= adapter->vfs_allocated_count)
3019 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3021 wr32(E1000_VMOLR(vfn), vmolr);
3025 * igb_configure_rx_ring - Configure a receive ring after Reset
3026 * @adapter: board private structure
3027 * @ring: receive ring to be configured
3029 * Configure the Rx unit of the MAC after a reset.
3031 void igb_configure_rx_ring(struct igb_adapter *adapter,
3032 struct igb_ring *ring)
3034 struct e1000_hw *hw = &adapter->hw;
3035 u64 rdba = ring->dma;
3036 int reg_idx = ring->reg_idx;
3037 u32 srrctl = 0, rxdctl = 0;
3039 /* disable the queue */
3040 wr32(E1000_RXDCTL(reg_idx), 0);
3042 /* Set DMA base address registers */
3043 wr32(E1000_RDBAL(reg_idx),
3044 rdba & 0x00000000ffffffffULL);
3045 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3046 wr32(E1000_RDLEN(reg_idx),
3047 ring->count * sizeof(union e1000_adv_rx_desc));
3049 /* initialize head and tail */
3050 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3051 wr32(E1000_RDH(reg_idx), 0);
3052 writel(0, ring->tail);
3054 /* set descriptor configuration */
3055 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3056 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3057 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3059 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3061 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3062 if (hw->mac.type == e1000_82580)
3063 srrctl |= E1000_SRRCTL_TIMESTAMP;
3064 /* Only set Drop Enable if we are supporting multiple queues */
3065 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3066 srrctl |= E1000_SRRCTL_DROP_EN;
3068 wr32(E1000_SRRCTL(reg_idx), srrctl);
3070 /* set filtering for VMDQ pools */
3071 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3073 rxdctl |= IGB_RX_PTHRESH;
3074 rxdctl |= IGB_RX_HTHRESH << 8;
3075 rxdctl |= IGB_RX_WTHRESH << 16;
3077 /* enable receive descriptor fetching */
3078 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3079 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3083 * igb_configure_rx - Configure receive Unit after Reset
3084 * @adapter: board private structure
3086 * Configure the Rx unit of the MAC after a reset.
3088 static void igb_configure_rx(struct igb_adapter *adapter)
3092 /* set UTA to appropriate mode */
3093 igb_set_uta(adapter);
3095 /* set the correct pool for the PF default MAC address in entry 0 */
3096 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3097 adapter->vfs_allocated_count);
3099 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3100 * the Base and Length of the Rx Descriptor Ring */
3101 for (i = 0; i < adapter->num_rx_queues; i++)
3102 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3106 * igb_free_tx_resources - Free Tx Resources per Queue
3107 * @tx_ring: Tx descriptor ring for a specific queue
3109 * Free all transmit software resources
3111 void igb_free_tx_resources(struct igb_ring *tx_ring)
3113 igb_clean_tx_ring(tx_ring);
3115 vfree(tx_ring->tx_buffer_info);
3116 tx_ring->tx_buffer_info = NULL;
3118 /* if not set, then don't free */
3122 dma_free_coherent(tx_ring->dev, tx_ring->size,
3123 tx_ring->desc, tx_ring->dma);
3125 tx_ring->desc = NULL;
3129 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3130 * @adapter: board private structure
3132 * Free all transmit software resources
3134 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3138 for (i = 0; i < adapter->num_tx_queues; i++)
3139 igb_free_tx_resources(adapter->tx_ring[i]);
3142 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3143 struct igb_tx_buffer *tx_buffer)
3145 if (tx_buffer->skb) {
3146 dev_kfree_skb_any(tx_buffer->skb);
3148 dma_unmap_single(ring->dev,
3152 } else if (tx_buffer->dma) {
3153 dma_unmap_page(ring->dev,
3158 tx_buffer->next_to_watch = NULL;
3159 tx_buffer->skb = NULL;
3161 /* buffer_info must be completely set up in the transmit path */
3165 * igb_clean_tx_ring - Free Tx Buffers
3166 * @tx_ring: ring to be cleaned
3168 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3170 struct igb_tx_buffer *buffer_info;
3174 if (!tx_ring->tx_buffer_info)
3176 /* Free all the Tx ring sk_buffs */
3178 for (i = 0; i < tx_ring->count; i++) {
3179 buffer_info = &tx_ring->tx_buffer_info[i];
3180 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3183 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3184 memset(tx_ring->tx_buffer_info, 0, size);
3186 /* Zero out the descriptor ring */
3187 memset(tx_ring->desc, 0, tx_ring->size);
3189 tx_ring->next_to_use = 0;
3190 tx_ring->next_to_clean = 0;
3194 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3195 * @adapter: board private structure
3197 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3201 for (i = 0; i < adapter->num_tx_queues; i++)
3202 igb_clean_tx_ring(adapter->tx_ring[i]);
3206 * igb_free_rx_resources - Free Rx Resources
3207 * @rx_ring: ring to clean the resources from
3209 * Free all receive software resources
3211 void igb_free_rx_resources(struct igb_ring *rx_ring)
3213 igb_clean_rx_ring(rx_ring);
3215 vfree(rx_ring->rx_buffer_info);
3216 rx_ring->rx_buffer_info = NULL;
3218 /* if not set, then don't free */
3222 dma_free_coherent(rx_ring->dev, rx_ring->size,
3223 rx_ring->desc, rx_ring->dma);
3225 rx_ring->desc = NULL;
3229 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3230 * @adapter: board private structure
3232 * Free all receive software resources
3234 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3238 for (i = 0; i < adapter->num_rx_queues; i++)
3239 igb_free_rx_resources(adapter->rx_ring[i]);
3243 * igb_clean_rx_ring - Free Rx Buffers per Queue
3244 * @rx_ring: ring to free buffers from
3246 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3251 if (!rx_ring->rx_buffer_info)
3254 /* Free all the Rx ring sk_buffs */
3255 for (i = 0; i < rx_ring->count; i++) {
3256 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3257 if (buffer_info->dma) {
3258 dma_unmap_single(rx_ring->dev,
3262 buffer_info->dma = 0;
3265 if (buffer_info->skb) {
3266 dev_kfree_skb(buffer_info->skb);
3267 buffer_info->skb = NULL;
3269 if (buffer_info->page_dma) {
3270 dma_unmap_page(rx_ring->dev,
3271 buffer_info->page_dma,
3274 buffer_info->page_dma = 0;
3276 if (buffer_info->page) {
3277 put_page(buffer_info->page);
3278 buffer_info->page = NULL;
3279 buffer_info->page_offset = 0;
3283 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3284 memset(rx_ring->rx_buffer_info, 0, size);
3286 /* Zero out the descriptor ring */
3287 memset(rx_ring->desc, 0, rx_ring->size);
3289 rx_ring->next_to_clean = 0;
3290 rx_ring->next_to_use = 0;
3294 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3295 * @adapter: board private structure
3297 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3301 for (i = 0; i < adapter->num_rx_queues; i++)
3302 igb_clean_rx_ring(adapter->rx_ring[i]);
3306 * igb_set_mac - Change the Ethernet Address of the NIC
3307 * @netdev: network interface device structure
3308 * @p: pointer to an address structure
3310 * Returns 0 on success, negative on failure
3312 static int igb_set_mac(struct net_device *netdev, void *p)
3314 struct igb_adapter *adapter = netdev_priv(netdev);
3315 struct e1000_hw *hw = &adapter->hw;
3316 struct sockaddr *addr = p;
3318 if (!is_valid_ether_addr(addr->sa_data))
3319 return -EADDRNOTAVAIL;
3321 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3322 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3324 /* set the correct pool for the new PF MAC address in entry 0 */
3325 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3326 adapter->vfs_allocated_count);
3332 * igb_write_mc_addr_list - write multicast addresses to MTA
3333 * @netdev: network interface device structure
3335 * Writes multicast address list to the MTA hash table.
3336 * Returns: -ENOMEM on failure
3337 * 0 on no addresses written
3338 * X on writing X addresses to MTA
3340 static int igb_write_mc_addr_list(struct net_device *netdev)
3342 struct igb_adapter *adapter = netdev_priv(netdev);
3343 struct e1000_hw *hw = &adapter->hw;
3344 struct netdev_hw_addr *ha;
3348 if (netdev_mc_empty(netdev)) {
3349 /* nothing to program, so clear mc list */
3350 igb_update_mc_addr_list(hw, NULL, 0);
3351 igb_restore_vf_multicasts(adapter);
3355 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3359 /* The shared function expects a packed array of only addresses. */
3361 netdev_for_each_mc_addr(ha, netdev)
3362 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3364 igb_update_mc_addr_list(hw, mta_list, i);
3367 return netdev_mc_count(netdev);
3371 * igb_write_uc_addr_list - write unicast addresses to RAR table
3372 * @netdev: network interface device structure
3374 * Writes unicast address list to the RAR table.
3375 * Returns: -ENOMEM on failure/insufficient address space
3376 * 0 on no addresses written
3377 * X on writing X addresses to the RAR table
3379 static int igb_write_uc_addr_list(struct net_device *netdev)
3381 struct igb_adapter *adapter = netdev_priv(netdev);
3382 struct e1000_hw *hw = &adapter->hw;
3383 unsigned int vfn = adapter->vfs_allocated_count;
3384 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3387 /* return ENOMEM indicating insufficient memory for addresses */
3388 if (netdev_uc_count(netdev) > rar_entries)
3391 if (!netdev_uc_empty(netdev) && rar_entries) {
3392 struct netdev_hw_addr *ha;
3394 netdev_for_each_uc_addr(ha, netdev) {
3397 igb_rar_set_qsel(adapter, ha->addr,
3403 /* write the addresses in reverse order to avoid write combining */
3404 for (; rar_entries > 0 ; rar_entries--) {
3405 wr32(E1000_RAH(rar_entries), 0);
3406 wr32(E1000_RAL(rar_entries), 0);
3414 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3415 * @netdev: network interface device structure
3417 * The set_rx_mode entry point is called whenever the unicast or multicast
3418 * address lists or the network interface flags are updated. This routine is
3419 * responsible for configuring the hardware for proper unicast, multicast,
3420 * promiscuous mode, and all-multi behavior.
3422 static void igb_set_rx_mode(struct net_device *netdev)
3424 struct igb_adapter *adapter = netdev_priv(netdev);
3425 struct e1000_hw *hw = &adapter->hw;
3426 unsigned int vfn = adapter->vfs_allocated_count;
3427 u32 rctl, vmolr = 0;
3430 /* Check for Promiscuous and All Multicast modes */
3431 rctl = rd32(E1000_RCTL);
3433 /* clear the effected bits */
3434 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3436 if (netdev->flags & IFF_PROMISC) {
3437 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3438 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3440 if (netdev->flags & IFF_ALLMULTI) {
3441 rctl |= E1000_RCTL_MPE;
3442 vmolr |= E1000_VMOLR_MPME;
3445 * Write addresses to the MTA, if the attempt fails
3446 * then we should just turn on promiscuous mode so
3447 * that we can at least receive multicast traffic
3449 count = igb_write_mc_addr_list(netdev);
3451 rctl |= E1000_RCTL_MPE;
3452 vmolr |= E1000_VMOLR_MPME;
3454 vmolr |= E1000_VMOLR_ROMPE;
3458 * Write addresses to available RAR registers, if there is not
3459 * sufficient space to store all the addresses then enable
3460 * unicast promiscuous mode
3462 count = igb_write_uc_addr_list(netdev);
3464 rctl |= E1000_RCTL_UPE;
3465 vmolr |= E1000_VMOLR_ROPE;
3467 rctl |= E1000_RCTL_VFE;
3469 wr32(E1000_RCTL, rctl);
3472 * In order to support SR-IOV and eventually VMDq it is necessary to set
3473 * the VMOLR to enable the appropriate modes. Without this workaround
3474 * we will have issues with VLAN tag stripping not being done for frames
3475 * that are only arriving because we are the default pool
3477 if (hw->mac.type < e1000_82576)
3480 vmolr |= rd32(E1000_VMOLR(vfn)) &
3481 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3482 wr32(E1000_VMOLR(vfn), vmolr);
3483 igb_restore_vf_multicasts(adapter);
3486 static void igb_check_wvbr(struct igb_adapter *adapter)
3488 struct e1000_hw *hw = &adapter->hw;
3491 switch (hw->mac.type) {
3494 if (!(wvbr = rd32(E1000_WVBR)))
3501 adapter->wvbr |= wvbr;
3504 #define IGB_STAGGERED_QUEUE_OFFSET 8
3506 static void igb_spoof_check(struct igb_adapter *adapter)
3513 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3514 if (adapter->wvbr & (1 << j) ||
3515 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3516 dev_warn(&adapter->pdev->dev,
3517 "Spoof event(s) detected on VF %d\n", j);
3520 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3525 /* Need to wait a few seconds after link up to get diagnostic information from
3527 static void igb_update_phy_info(unsigned long data)
3529 struct igb_adapter *adapter = (struct igb_adapter *) data;
3530 igb_get_phy_info(&adapter->hw);
3534 * igb_has_link - check shared code for link and determine up/down
3535 * @adapter: pointer to driver private info
3537 bool igb_has_link(struct igb_adapter *adapter)
3539 struct e1000_hw *hw = &adapter->hw;
3540 bool link_active = false;
3543 /* get_link_status is set on LSC (link status) interrupt or
3544 * rx sequence error interrupt. get_link_status will stay
3545 * false until the e1000_check_for_link establishes link
3546 * for copper adapters ONLY
3548 switch (hw->phy.media_type) {
3549 case e1000_media_type_copper:
3550 if (hw->mac.get_link_status) {
3551 ret_val = hw->mac.ops.check_for_link(hw);
3552 link_active = !hw->mac.get_link_status;
3557 case e1000_media_type_internal_serdes:
3558 ret_val = hw->mac.ops.check_for_link(hw);
3559 link_active = hw->mac.serdes_has_link;
3562 case e1000_media_type_unknown:
3569 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3572 u32 ctrl_ext, thstat;
3574 /* check for thermal sensor event on i350, copper only */
3575 if (hw->mac.type == e1000_i350) {
3576 thstat = rd32(E1000_THSTAT);
3577 ctrl_ext = rd32(E1000_CTRL_EXT);
3579 if ((hw->phy.media_type == e1000_media_type_copper) &&
3580 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3581 ret = !!(thstat & event);
3589 * igb_watchdog - Timer Call-back
3590 * @data: pointer to adapter cast into an unsigned long
3592 static void igb_watchdog(unsigned long data)
3594 struct igb_adapter *adapter = (struct igb_adapter *)data;
3595 /* Do the rest outside of interrupt context */
3596 schedule_work(&adapter->watchdog_task);
3599 static void igb_watchdog_task(struct work_struct *work)
3601 struct igb_adapter *adapter = container_of(work,
3604 struct e1000_hw *hw = &adapter->hw;
3605 struct net_device *netdev = adapter->netdev;
3609 link = igb_has_link(adapter);
3611 if (!netif_carrier_ok(netdev)) {
3613 hw->mac.ops.get_speed_and_duplex(hw,
3614 &adapter->link_speed,
3615 &adapter->link_duplex);
3617 ctrl = rd32(E1000_CTRL);
3618 /* Links status message must follow this format */
3619 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3620 "Flow Control: %s\n",
3622 adapter->link_speed,
3623 adapter->link_duplex == FULL_DUPLEX ?
3624 "Full Duplex" : "Half Duplex",
3625 ((ctrl & E1000_CTRL_TFCE) &&
3626 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3627 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3628 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3630 /* check for thermal sensor event */
3631 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3632 printk(KERN_INFO "igb: %s The network adapter "
3633 "link speed was downshifted "
3634 "because it overheated.\n",
3638 /* adjust timeout factor according to speed/duplex */
3639 adapter->tx_timeout_factor = 1;
3640 switch (adapter->link_speed) {
3642 adapter->tx_timeout_factor = 14;
3645 /* maybe add some timeout factor ? */
3649 netif_carrier_on(netdev);
3651 igb_ping_all_vfs(adapter);
3652 igb_check_vf_rate_limit(adapter);
3654 /* link state has changed, schedule phy info update */
3655 if (!test_bit(__IGB_DOWN, &adapter->state))
3656 mod_timer(&adapter->phy_info_timer,
3657 round_jiffies(jiffies + 2 * HZ));
3660 if (netif_carrier_ok(netdev)) {
3661 adapter->link_speed = 0;
3662 adapter->link_duplex = 0;
3664 /* check for thermal sensor event */
3665 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3666 printk(KERN_ERR "igb: %s The network adapter "
3667 "was stopped because it "
3672 /* Links status message must follow this format */
3673 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3675 netif_carrier_off(netdev);
3677 igb_ping_all_vfs(adapter);
3679 /* link state has changed, schedule phy info update */
3680 if (!test_bit(__IGB_DOWN, &adapter->state))
3681 mod_timer(&adapter->phy_info_timer,
3682 round_jiffies(jiffies + 2 * HZ));
3686 spin_lock(&adapter->stats64_lock);
3687 igb_update_stats(adapter, &adapter->stats64);
3688 spin_unlock(&adapter->stats64_lock);
3690 for (i = 0; i < adapter->num_tx_queues; i++) {
3691 struct igb_ring *tx_ring = adapter->tx_ring[i];
3692 if (!netif_carrier_ok(netdev)) {
3693 /* We've lost link, so the controller stops DMA,
3694 * but we've got queued Tx work that's never going
3695 * to get done, so reset controller to flush Tx.
3696 * (Do the reset outside of interrupt context). */
3697 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3698 adapter->tx_timeout_count++;
3699 schedule_work(&adapter->reset_task);
3700 /* return immediately since reset is imminent */
3705 /* Force detection of hung controller every watchdog period */
3706 tx_ring->detect_tx_hung = true;
3709 /* Cause software interrupt to ensure rx ring is cleaned */
3710 if (adapter->msix_entries) {
3712 for (i = 0; i < adapter->num_q_vectors; i++) {
3713 struct igb_q_vector *q_vector = adapter->q_vector[i];
3714 eics |= q_vector->eims_value;
3716 wr32(E1000_EICS, eics);
3718 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3721 igb_spoof_check(adapter);
3723 /* Reset the timer */
3724 if (!test_bit(__IGB_DOWN, &adapter->state))
3725 mod_timer(&adapter->watchdog_timer,
3726 round_jiffies(jiffies + 2 * HZ));
3729 enum latency_range {
3733 latency_invalid = 255
3737 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3739 * Stores a new ITR value based on strictly on packet size. This
3740 * algorithm is less sophisticated than that used in igb_update_itr,
3741 * due to the difficulty of synchronizing statistics across multiple
3742 * receive rings. The divisors and thresholds used by this function
3743 * were determined based on theoretical maximum wire speed and testing
3744 * data, in order to minimize response time while increasing bulk
3746 * This functionality is controlled by the InterruptThrottleRate module
3747 * parameter (see igb_param.c)
3748 * NOTE: This function is called only when operating in a multiqueue
3749 * receive environment.
3750 * @q_vector: pointer to q_vector
3752 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3754 int new_val = q_vector->itr_val;
3755 int avg_wire_size = 0;
3756 struct igb_adapter *adapter = q_vector->adapter;
3757 struct igb_ring *ring;
3758 unsigned int packets;
3760 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3761 * ints/sec - ITR timer value of 120 ticks.
3763 if (adapter->link_speed != SPEED_1000) {
3768 ring = q_vector->rx_ring;
3770 packets = ACCESS_ONCE(ring->total_packets);
3773 avg_wire_size = ring->total_bytes / packets;
3776 ring = q_vector->tx_ring;
3778 packets = ACCESS_ONCE(ring->total_packets);
3781 avg_wire_size = max_t(u32, avg_wire_size,
3782 ring->total_bytes / packets);
3785 /* if avg_wire_size isn't set no work was done */
3789 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3790 avg_wire_size += 24;
3792 /* Don't starve jumbo frames */
3793 avg_wire_size = min(avg_wire_size, 3000);
3795 /* Give a little boost to mid-size frames */
3796 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3797 new_val = avg_wire_size / 3;
3799 new_val = avg_wire_size / 2;
3801 /* when in itr mode 3 do not exceed 20K ints/sec */
3802 if (adapter->rx_itr_setting == 3 && new_val < 196)
3806 if (new_val != q_vector->itr_val) {
3807 q_vector->itr_val = new_val;
3808 q_vector->set_itr = 1;
3811 if (q_vector->rx_ring) {
3812 q_vector->rx_ring->total_bytes = 0;
3813 q_vector->rx_ring->total_packets = 0;
3815 if (q_vector->tx_ring) {
3816 q_vector->tx_ring->total_bytes = 0;
3817 q_vector->tx_ring->total_packets = 0;
3822 * igb_update_itr - update the dynamic ITR value based on statistics
3823 * Stores a new ITR value based on packets and byte
3824 * counts during the last interrupt. The advantage of per interrupt
3825 * computation is faster updates and more accurate ITR for the current
3826 * traffic pattern. Constants in this function were computed
3827 * based on theoretical maximum wire speed and thresholds were set based
3828 * on testing data as well as attempting to minimize response time
3829 * while increasing bulk throughput.
3830 * this functionality is controlled by the InterruptThrottleRate module
3831 * parameter (see igb_param.c)
3832 * NOTE: These calculations are only valid when operating in a single-
3833 * queue environment.
3834 * @adapter: pointer to adapter
3835 * @itr_setting: current q_vector->itr_val
3836 * @packets: the number of packets during this measurement interval
3837 * @bytes: the number of bytes during this measurement interval
3839 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3840 int packets, int bytes)
3842 unsigned int retval = itr_setting;
3845 goto update_itr_done;
3847 switch (itr_setting) {
3848 case lowest_latency:
3849 /* handle TSO and jumbo frames */
3850 if (bytes/packets > 8000)
3851 retval = bulk_latency;
3852 else if ((packets < 5) && (bytes > 512))
3853 retval = low_latency;
3855 case low_latency: /* 50 usec aka 20000 ints/s */
3856 if (bytes > 10000) {
3857 /* this if handles the TSO accounting */
3858 if (bytes/packets > 8000) {
3859 retval = bulk_latency;
3860 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3861 retval = bulk_latency;
3862 } else if ((packets > 35)) {
3863 retval = lowest_latency;
3865 } else if (bytes/packets > 2000) {
3866 retval = bulk_latency;
3867 } else if (packets <= 2 && bytes < 512) {
3868 retval = lowest_latency;
3871 case bulk_latency: /* 250 usec aka 4000 ints/s */
3872 if (bytes > 25000) {
3874 retval = low_latency;
3875 } else if (bytes < 1500) {
3876 retval = low_latency;
3885 static void igb_set_itr(struct igb_adapter *adapter)
3887 struct igb_q_vector *q_vector = adapter->q_vector[0];
3889 u32 new_itr = q_vector->itr_val;
3891 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3892 if (adapter->link_speed != SPEED_1000) {
3898 adapter->rx_itr = igb_update_itr(adapter,
3900 q_vector->rx_ring->total_packets,
3901 q_vector->rx_ring->total_bytes);
3903 adapter->tx_itr = igb_update_itr(adapter,
3905 q_vector->tx_ring->total_packets,
3906 q_vector->tx_ring->total_bytes);
3907 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3909 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3910 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3911 current_itr = low_latency;
3913 switch (current_itr) {
3914 /* counts and packets in update_itr are dependent on these numbers */
3915 case lowest_latency:
3916 new_itr = 56; /* aka 70,000 ints/sec */
3919 new_itr = 196; /* aka 20,000 ints/sec */
3922 new_itr = 980; /* aka 4,000 ints/sec */
3929 q_vector->rx_ring->total_bytes = 0;
3930 q_vector->rx_ring->total_packets = 0;
3931 q_vector->tx_ring->total_bytes = 0;
3932 q_vector->tx_ring->total_packets = 0;
3934 if (new_itr != q_vector->itr_val) {
3935 /* this attempts to bias the interrupt rate towards Bulk
3936 * by adding intermediate steps when interrupt rate is
3938 new_itr = new_itr > q_vector->itr_val ?
3939 max((new_itr * q_vector->itr_val) /
3940 (new_itr + (q_vector->itr_val >> 2)),
3943 /* Don't write the value here; it resets the adapter's
3944 * internal timer, and causes us to delay far longer than
3945 * we should between interrupts. Instead, we write the ITR
3946 * value at the beginning of the next interrupt so the timing
3947 * ends up being correct.
3949 q_vector->itr_val = new_itr;
3950 q_vector->set_itr = 1;
3954 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3955 u32 type_tucmd, u32 mss_l4len_idx)
3957 struct e1000_adv_tx_context_desc *context_desc;
3958 u16 i = tx_ring->next_to_use;
3960 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3963 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3965 /* set bits to identify this as an advanced context descriptor */
3966 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3968 /* For 82575, context index must be unique per ring. */
3969 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3970 mss_l4len_idx |= tx_ring->reg_idx << 4;
3972 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
3973 context_desc->seqnum_seed = 0;
3974 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
3975 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3978 static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
3979 u32 tx_flags, __be16 protocol, u8 *hdr_len)
3982 u32 vlan_macip_lens, type_tucmd;
3983 u32 mss_l4len_idx, l4len;
3985 if (!skb_is_gso(skb))
3988 if (skb_header_cloned(skb)) {
3989 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3994 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3995 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
3997 if (protocol == __constant_htons(ETH_P_IP)) {
3998 struct iphdr *iph = ip_hdr(skb);
4001 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4005 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4006 } else if (skb_is_gso_v6(skb)) {
4007 ipv6_hdr(skb)->payload_len = 0;
4008 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4009 &ipv6_hdr(skb)->daddr,
4013 l4len = tcp_hdrlen(skb);
4014 *hdr_len = skb_transport_offset(skb) + l4len;
4017 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4018 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4020 /* VLAN MACLEN IPLEN */
4021 vlan_macip_lens = skb_network_header_len(skb);
4022 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4023 vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4025 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4030 static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
4031 u32 tx_flags, __be16 protocol)
4033 u32 vlan_macip_lens = 0;
4034 u32 mss_l4len_idx = 0;
4037 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4038 if (!(tx_flags & IGB_TX_FLAGS_VLAN))
4043 case __constant_htons(ETH_P_IP):
4044 vlan_macip_lens |= skb_network_header_len(skb);
4045 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4046 l4_hdr = ip_hdr(skb)->protocol;
4048 case __constant_htons(ETH_P_IPV6):
4049 vlan_macip_lens |= skb_network_header_len(skb);
4050 l4_hdr = ipv6_hdr(skb)->nexthdr;
4053 if (unlikely(net_ratelimit())) {
4054 dev_warn(tx_ring->dev,
4055 "partial checksum but proto=%x!\n",
4063 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4064 mss_l4len_idx = tcp_hdrlen(skb) <<
4065 E1000_ADVTXD_L4LEN_SHIFT;
4068 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4069 mss_l4len_idx = sizeof(struct sctphdr) <<
4070 E1000_ADVTXD_L4LEN_SHIFT;
4073 mss_l4len_idx = sizeof(struct udphdr) <<
4074 E1000_ADVTXD_L4LEN_SHIFT;
4077 if (unlikely(net_ratelimit())) {
4078 dev_warn(tx_ring->dev,
4079 "partial checksum but l4 proto=%x!\n",
4086 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4087 vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4089 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4091 return (skb->ip_summed == CHECKSUM_PARTIAL);
4094 static __le32 igb_tx_cmd_type(u32 tx_flags)
4096 /* set type for advanced descriptor with frame checksum insertion */
4097 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4098 E1000_ADVTXD_DCMD_IFCS |
4099 E1000_ADVTXD_DCMD_DEXT);
4101 /* set HW vlan bit if vlan is present */
4102 if (tx_flags & IGB_TX_FLAGS_VLAN)
4103 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4105 /* set timestamp bit if present */
4106 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4107 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4109 /* set segmentation bits for TSO */
4110 if (tx_flags & IGB_TX_FLAGS_TSO)
4111 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4116 static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
4117 struct igb_ring *tx_ring)
4119 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4121 /* 82575 requires a unique index per ring if any offload is enabled */
4122 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4123 (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX))
4124 olinfo_status |= tx_ring->reg_idx << 4;
4126 /* insert L4 checksum */
4127 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4128 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4130 /* insert IPv4 checksum */
4131 if (tx_flags & IGB_TX_FLAGS_IPV4)
4132 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4135 return cpu_to_le32(olinfo_status);
4139 * The largest size we can write to the descriptor is 65535. In order to
4140 * maintain a power of two alignment we have to limit ourselves to 32K.
4142 #define IGB_MAX_TXD_PWR 15
4143 #define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR)
4145 static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
4146 struct igb_tx_buffer *first, u32 tx_flags,
4149 struct igb_tx_buffer *tx_buffer_info;
4150 union e1000_adv_tx_desc *tx_desc;
4152 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4153 unsigned int data_len = skb->data_len;
4154 unsigned int size = skb_headlen(skb);
4155 unsigned int paylen = skb->len - hdr_len;
4157 u16 i = tx_ring->next_to_use;
4160 if (tx_flags & IGB_TX_FLAGS_TSO)
4161 gso_segs = skb_shinfo(skb)->gso_segs;
4165 /* multiply data chunks by size of headers */
4166 first->bytecount = paylen + (gso_segs * hdr_len);
4167 first->gso_segs = gso_segs;
4170 tx_desc = IGB_TX_DESC(tx_ring, i);
4172 tx_desc->read.olinfo_status =
4173 igb_tx_olinfo_status(tx_flags, paylen, tx_ring);
4175 cmd_type = igb_tx_cmd_type(tx_flags);
4177 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4178 if (dma_mapping_error(tx_ring->dev, dma))
4181 /* record length, and DMA address */
4182 first->length = size;
4184 first->tx_flags = tx_flags;
4185 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4188 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4189 tx_desc->read.cmd_type_len =
4190 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4194 if (i == tx_ring->count) {
4195 tx_desc = IGB_TX_DESC(tx_ring, 0);
4199 dma += IGB_MAX_DATA_PER_TXD;
4200 size -= IGB_MAX_DATA_PER_TXD;
4202 tx_desc->read.olinfo_status = 0;
4203 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4206 if (likely(!data_len))
4209 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4213 if (i == tx_ring->count) {
4214 tx_desc = IGB_TX_DESC(tx_ring, 0);
4221 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4222 size, DMA_TO_DEVICE);
4223 if (dma_mapping_error(tx_ring->dev, dma))
4226 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4227 tx_buffer_info->length = size;
4228 tx_buffer_info->dma = dma;
4230 tx_desc->read.olinfo_status = 0;
4231 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4236 /* write last descriptor with RS and EOP bits */
4237 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4238 tx_desc->read.cmd_type_len = cmd_type;
4240 /* set the timestamp */
4241 first->time_stamp = jiffies;
4244 * Force memory writes to complete before letting h/w know there
4245 * are new descriptors to fetch. (Only applicable for weak-ordered
4246 * memory model archs, such as IA-64).
4248 * We also need this memory barrier to make certain all of the
4249 * status bits have been updated before next_to_watch is written.
4253 /* set next_to_watch value indicating a packet is present */
4254 first->next_to_watch = tx_desc;
4257 if (i == tx_ring->count)
4260 tx_ring->next_to_use = i;
4262 writel(i, tx_ring->tail);
4264 /* we need this if more than one processor can write to our tail
4265 * at a time, it syncronizes IO on IA64/Altix systems */
4271 dev_err(tx_ring->dev, "TX DMA map failed\n");
4273 /* clear dma mappings for failed tx_buffer_info map */
4275 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4276 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4277 if (tx_buffer_info == first)
4284 tx_ring->next_to_use = i;
4287 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4289 struct net_device *netdev = tx_ring->netdev;
4291 netif_stop_subqueue(netdev, tx_ring->queue_index);
4293 /* Herbert's original patch had:
4294 * smp_mb__after_netif_stop_queue();
4295 * but since that doesn't exist yet, just open code it. */
4298 /* We need to check again in a case another CPU has just
4299 * made room available. */
4300 if (igb_desc_unused(tx_ring) < size)
4304 netif_wake_subqueue(netdev, tx_ring->queue_index);
4306 u64_stats_update_begin(&tx_ring->tx_syncp2);
4307 tx_ring->tx_stats.restart_queue2++;
4308 u64_stats_update_end(&tx_ring->tx_syncp2);
4313 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4315 if (igb_desc_unused(tx_ring) >= size)
4317 return __igb_maybe_stop_tx(tx_ring, size);
4320 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4321 struct igb_ring *tx_ring)
4323 struct igb_tx_buffer *first;
4326 __be16 protocol = vlan_get_protocol(skb);
4329 /* need: 1 descriptor per page,
4330 * + 2 desc gap to keep tail from touching head,
4331 * + 1 desc for skb->data,
4332 * + 1 desc for context descriptor,
4333 * otherwise try next time */
4334 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4335 /* this is a hard error */
4336 return NETDEV_TX_BUSY;
4339 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4340 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4341 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4344 if (vlan_tx_tag_present(skb)) {
4345 tx_flags |= IGB_TX_FLAGS_VLAN;
4346 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4349 /* record the location of the first descriptor for this packet */
4350 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4352 tso = igb_tso(tx_ring, skb, tx_flags, protocol, &hdr_len);
4356 tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM;
4357 if (protocol == htons(ETH_P_IP))
4358 tx_flags |= IGB_TX_FLAGS_IPV4;
4359 } else if (igb_tx_csum(tx_ring, skb, tx_flags, protocol) &&
4360 (skb->ip_summed == CHECKSUM_PARTIAL)) {
4361 tx_flags |= IGB_TX_FLAGS_CSUM;
4364 igb_tx_map(tx_ring, skb, first, tx_flags, hdr_len);
4366 /* Make sure there is space in the ring for the next send. */
4367 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4369 return NETDEV_TX_OK;
4372 dev_kfree_skb_any(skb);
4373 return NETDEV_TX_OK;
4376 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4377 struct sk_buff *skb)
4379 unsigned int r_idx = skb->queue_mapping;
4381 if (r_idx >= adapter->num_tx_queues)
4382 r_idx = r_idx % adapter->num_tx_queues;
4384 return adapter->tx_ring[r_idx];
4387 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4388 struct net_device *netdev)
4390 struct igb_adapter *adapter = netdev_priv(netdev);
4392 if (test_bit(__IGB_DOWN, &adapter->state)) {
4393 dev_kfree_skb_any(skb);
4394 return NETDEV_TX_OK;
4397 if (skb->len <= 0) {
4398 dev_kfree_skb_any(skb);
4399 return NETDEV_TX_OK;
4403 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4404 * in order to meet this minimum size requirement.
4406 if (skb->len < 17) {
4407 if (skb_padto(skb, 17))
4408 return NETDEV_TX_OK;
4412 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4416 * igb_tx_timeout - Respond to a Tx Hang
4417 * @netdev: network interface device structure
4419 static void igb_tx_timeout(struct net_device *netdev)
4421 struct igb_adapter *adapter = netdev_priv(netdev);
4422 struct e1000_hw *hw = &adapter->hw;
4424 /* Do the reset outside of interrupt context */
4425 adapter->tx_timeout_count++;
4427 if (hw->mac.type == e1000_82580)
4428 hw->dev_spec._82575.global_device_reset = true;
4430 schedule_work(&adapter->reset_task);
4432 (adapter->eims_enable_mask & ~adapter->eims_other));
4435 static void igb_reset_task(struct work_struct *work)
4437 struct igb_adapter *adapter;
4438 adapter = container_of(work, struct igb_adapter, reset_task);
4441 netdev_err(adapter->netdev, "Reset adapter\n");
4442 igb_reinit_locked(adapter);
4446 * igb_get_stats64 - Get System Network Statistics
4447 * @netdev: network interface device structure
4448 * @stats: rtnl_link_stats64 pointer
4451 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4452 struct rtnl_link_stats64 *stats)
4454 struct igb_adapter *adapter = netdev_priv(netdev);
4456 spin_lock(&adapter->stats64_lock);
4457 igb_update_stats(adapter, &adapter->stats64);
4458 memcpy(stats, &adapter->stats64, sizeof(*stats));
4459 spin_unlock(&adapter->stats64_lock);
4465 * igb_change_mtu - Change the Maximum Transfer Unit
4466 * @netdev: network interface device structure
4467 * @new_mtu: new value for maximum frame size
4469 * Returns 0 on success, negative on failure
4471 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4473 struct igb_adapter *adapter = netdev_priv(netdev);
4474 struct pci_dev *pdev = adapter->pdev;
4475 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4477 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4478 dev_err(&pdev->dev, "Invalid MTU setting\n");
4482 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4483 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4484 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4488 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4491 /* igb_down has a dependency on max_frame_size */
4492 adapter->max_frame_size = max_frame;
4494 if (netif_running(netdev))
4497 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4498 netdev->mtu, new_mtu);
4499 netdev->mtu = new_mtu;
4501 if (netif_running(netdev))
4506 clear_bit(__IGB_RESETTING, &adapter->state);
4512 * igb_update_stats - Update the board statistics counters
4513 * @adapter: board private structure
4516 void igb_update_stats(struct igb_adapter *adapter,
4517 struct rtnl_link_stats64 *net_stats)
4519 struct e1000_hw *hw = &adapter->hw;
4520 struct pci_dev *pdev = adapter->pdev;
4526 u64 _bytes, _packets;
4528 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4531 * Prevent stats update while adapter is being reset, or if the pci
4532 * connection is down.
4534 if (adapter->link_speed == 0)
4536 if (pci_channel_offline(pdev))
4541 for (i = 0; i < adapter->num_rx_queues; i++) {
4542 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4543 struct igb_ring *ring = adapter->rx_ring[i];
4545 ring->rx_stats.drops += rqdpc_tmp;
4546 net_stats->rx_fifo_errors += rqdpc_tmp;
4549 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4550 _bytes = ring->rx_stats.bytes;
4551 _packets = ring->rx_stats.packets;
4552 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4554 packets += _packets;
4557 net_stats->rx_bytes = bytes;
4558 net_stats->rx_packets = packets;
4562 for (i = 0; i < adapter->num_tx_queues; i++) {
4563 struct igb_ring *ring = adapter->tx_ring[i];
4565 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4566 _bytes = ring->tx_stats.bytes;
4567 _packets = ring->tx_stats.packets;
4568 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4570 packets += _packets;
4572 net_stats->tx_bytes = bytes;
4573 net_stats->tx_packets = packets;
4575 /* read stats registers */
4576 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4577 adapter->stats.gprc += rd32(E1000_GPRC);
4578 adapter->stats.gorc += rd32(E1000_GORCL);
4579 rd32(E1000_GORCH); /* clear GORCL */
4580 adapter->stats.bprc += rd32(E1000_BPRC);
4581 adapter->stats.mprc += rd32(E1000_MPRC);
4582 adapter->stats.roc += rd32(E1000_ROC);
4584 adapter->stats.prc64 += rd32(E1000_PRC64);
4585 adapter->stats.prc127 += rd32(E1000_PRC127);
4586 adapter->stats.prc255 += rd32(E1000_PRC255);
4587 adapter->stats.prc511 += rd32(E1000_PRC511);
4588 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4589 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4590 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4591 adapter->stats.sec += rd32(E1000_SEC);
4593 mpc = rd32(E1000_MPC);
4594 adapter->stats.mpc += mpc;
4595 net_stats->rx_fifo_errors += mpc;
4596 adapter->stats.scc += rd32(E1000_SCC);
4597 adapter->stats.ecol += rd32(E1000_ECOL);
4598 adapter->stats.mcc += rd32(E1000_MCC);
4599 adapter->stats.latecol += rd32(E1000_LATECOL);
4600 adapter->stats.dc += rd32(E1000_DC);
4601 adapter->stats.rlec += rd32(E1000_RLEC);
4602 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4603 adapter->stats.xontxc += rd32(E1000_XONTXC);
4604 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4605 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4606 adapter->stats.fcruc += rd32(E1000_FCRUC);
4607 adapter->stats.gptc += rd32(E1000_GPTC);
4608 adapter->stats.gotc += rd32(E1000_GOTCL);
4609 rd32(E1000_GOTCH); /* clear GOTCL */
4610 adapter->stats.rnbc += rd32(E1000_RNBC);
4611 adapter->stats.ruc += rd32(E1000_RUC);
4612 adapter->stats.rfc += rd32(E1000_RFC);
4613 adapter->stats.rjc += rd32(E1000_RJC);
4614 adapter->stats.tor += rd32(E1000_TORH);
4615 adapter->stats.tot += rd32(E1000_TOTH);
4616 adapter->stats.tpr += rd32(E1000_TPR);
4618 adapter->stats.ptc64 += rd32(E1000_PTC64);
4619 adapter->stats.ptc127 += rd32(E1000_PTC127);
4620 adapter->stats.ptc255 += rd32(E1000_PTC255);
4621 adapter->stats.ptc511 += rd32(E1000_PTC511);
4622 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4623 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4625 adapter->stats.mptc += rd32(E1000_MPTC);
4626 adapter->stats.bptc += rd32(E1000_BPTC);
4628 adapter->stats.tpt += rd32(E1000_TPT);
4629 adapter->stats.colc += rd32(E1000_COLC);
4631 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4632 /* read internal phy specific stats */
4633 reg = rd32(E1000_CTRL_EXT);
4634 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4635 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4636 adapter->stats.tncrs += rd32(E1000_TNCRS);
4639 adapter->stats.tsctc += rd32(E1000_TSCTC);
4640 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4642 adapter->stats.iac += rd32(E1000_IAC);
4643 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4644 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4645 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4646 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4647 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4648 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4649 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4650 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4652 /* Fill out the OS statistics structure */
4653 net_stats->multicast = adapter->stats.mprc;
4654 net_stats->collisions = adapter->stats.colc;
4658 /* RLEC on some newer hardware can be incorrect so build
4659 * our own version based on RUC and ROC */
4660 net_stats->rx_errors = adapter->stats.rxerrc +
4661 adapter->stats.crcerrs + adapter->stats.algnerrc +
4662 adapter->stats.ruc + adapter->stats.roc +
4663 adapter->stats.cexterr;
4664 net_stats->rx_length_errors = adapter->stats.ruc +
4666 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4667 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4668 net_stats->rx_missed_errors = adapter->stats.mpc;
4671 net_stats->tx_errors = adapter->stats.ecol +
4672 adapter->stats.latecol;
4673 net_stats->tx_aborted_errors = adapter->stats.ecol;
4674 net_stats->tx_window_errors = adapter->stats.latecol;
4675 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4677 /* Tx Dropped needs to be maintained elsewhere */
4680 if (hw->phy.media_type == e1000_media_type_copper) {
4681 if ((adapter->link_speed == SPEED_1000) &&
4682 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4683 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4684 adapter->phy_stats.idle_errors += phy_tmp;
4688 /* Management Stats */
4689 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4690 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4691 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4694 reg = rd32(E1000_MANC);
4695 if (reg & E1000_MANC_EN_BMC2OS) {
4696 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4697 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4698 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4699 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4703 static irqreturn_t igb_msix_other(int irq, void *data)
4705 struct igb_adapter *adapter = data;
4706 struct e1000_hw *hw = &adapter->hw;
4707 u32 icr = rd32(E1000_ICR);
4708 /* reading ICR causes bit 31 of EICR to be cleared */
4710 if (icr & E1000_ICR_DRSTA)
4711 schedule_work(&adapter->reset_task);
4713 if (icr & E1000_ICR_DOUTSYNC) {
4714 /* HW is reporting DMA is out of sync */
4715 adapter->stats.doosync++;
4716 /* The DMA Out of Sync is also indication of a spoof event
4717 * in IOV mode. Check the Wrong VM Behavior register to
4718 * see if it is really a spoof event. */
4719 igb_check_wvbr(adapter);
4722 /* Check for a mailbox event */
4723 if (icr & E1000_ICR_VMMB)
4724 igb_msg_task(adapter);
4726 if (icr & E1000_ICR_LSC) {
4727 hw->mac.get_link_status = 1;
4728 /* guard against interrupt when we're going down */
4729 if (!test_bit(__IGB_DOWN, &adapter->state))
4730 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4733 if (adapter->vfs_allocated_count)
4734 wr32(E1000_IMS, E1000_IMS_LSC |
4736 E1000_IMS_DOUTSYNC);
4738 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4739 wr32(E1000_EIMS, adapter->eims_other);
4744 static void igb_write_itr(struct igb_q_vector *q_vector)
4746 struct igb_adapter *adapter = q_vector->adapter;
4747 u32 itr_val = q_vector->itr_val & 0x7FFC;
4749 if (!q_vector->set_itr)
4755 if (adapter->hw.mac.type == e1000_82575)
4756 itr_val |= itr_val << 16;
4758 itr_val |= 0x8000000;
4760 writel(itr_val, q_vector->itr_register);
4761 q_vector->set_itr = 0;
4764 static irqreturn_t igb_msix_ring(int irq, void *data)
4766 struct igb_q_vector *q_vector = data;
4768 /* Write the ITR value calculated from the previous interrupt. */
4769 igb_write_itr(q_vector);
4771 napi_schedule(&q_vector->napi);
4776 #ifdef CONFIG_IGB_DCA
4777 static void igb_update_dca(struct igb_q_vector *q_vector)
4779 struct igb_adapter *adapter = q_vector->adapter;
4780 struct e1000_hw *hw = &adapter->hw;
4781 int cpu = get_cpu();
4783 if (q_vector->cpu == cpu)
4786 if (q_vector->tx_ring) {
4787 int q = q_vector->tx_ring->reg_idx;
4788 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4789 if (hw->mac.type == e1000_82575) {
4790 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4791 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4793 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4794 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795 E1000_DCA_TXCTRL_CPUID_SHIFT;
4797 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4798 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4800 if (q_vector->rx_ring) {
4801 int q = q_vector->rx_ring->reg_idx;
4802 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4803 if (hw->mac.type == e1000_82575) {
4804 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4805 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4807 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4808 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4809 E1000_DCA_RXCTRL_CPUID_SHIFT;
4811 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4812 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4813 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4814 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4816 q_vector->cpu = cpu;
4821 static void igb_setup_dca(struct igb_adapter *adapter)
4823 struct e1000_hw *hw = &adapter->hw;
4826 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4829 /* Always use CB2 mode, difference is masked in the CB driver. */
4830 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4832 for (i = 0; i < adapter->num_q_vectors; i++) {
4833 adapter->q_vector[i]->cpu = -1;
4834 igb_update_dca(adapter->q_vector[i]);
4838 static int __igb_notify_dca(struct device *dev, void *data)
4840 struct net_device *netdev = dev_get_drvdata(dev);
4841 struct igb_adapter *adapter = netdev_priv(netdev);
4842 struct pci_dev *pdev = adapter->pdev;
4843 struct e1000_hw *hw = &adapter->hw;
4844 unsigned long event = *(unsigned long *)data;
4847 case DCA_PROVIDER_ADD:
4848 /* if already enabled, don't do it again */
4849 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4851 if (dca_add_requester(dev) == 0) {
4852 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4853 dev_info(&pdev->dev, "DCA enabled\n");
4854 igb_setup_dca(adapter);
4857 /* Fall Through since DCA is disabled. */
4858 case DCA_PROVIDER_REMOVE:
4859 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4860 /* without this a class_device is left
4861 * hanging around in the sysfs model */
4862 dca_remove_requester(dev);
4863 dev_info(&pdev->dev, "DCA disabled\n");
4864 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4865 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4873 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4878 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4881 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4883 #endif /* CONFIG_IGB_DCA */
4885 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4887 struct e1000_hw *hw = &adapter->hw;
4891 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4892 ping = E1000_PF_CONTROL_MSG;
4893 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4894 ping |= E1000_VT_MSGTYPE_CTS;
4895 igb_write_mbx(hw, &ping, 1, i);
4899 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4901 struct e1000_hw *hw = &adapter->hw;
4902 u32 vmolr = rd32(E1000_VMOLR(vf));
4903 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4905 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4906 IGB_VF_FLAG_MULTI_PROMISC);
4907 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4909 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4910 vmolr |= E1000_VMOLR_MPME;
4911 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4912 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4915 * if we have hashes and we are clearing a multicast promisc
4916 * flag we need to write the hashes to the MTA as this step
4917 * was previously skipped
4919 if (vf_data->num_vf_mc_hashes > 30) {
4920 vmolr |= E1000_VMOLR_MPME;
4921 } else if (vf_data->num_vf_mc_hashes) {
4923 vmolr |= E1000_VMOLR_ROMPE;
4924 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4925 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4929 wr32(E1000_VMOLR(vf), vmolr);
4931 /* there are flags left unprocessed, likely not supported */
4932 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4939 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4940 u32 *msgbuf, u32 vf)
4942 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4943 u16 *hash_list = (u16 *)&msgbuf[1];
4944 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4947 /* salt away the number of multicast addresses assigned
4948 * to this VF for later use to restore when the PF multi cast
4951 vf_data->num_vf_mc_hashes = n;
4953 /* only up to 30 hash values supported */
4957 /* store the hashes for later use */
4958 for (i = 0; i < n; i++)
4959 vf_data->vf_mc_hashes[i] = hash_list[i];
4961 /* Flush and reset the mta with the new values */
4962 igb_set_rx_mode(adapter->netdev);
4967 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4969 struct e1000_hw *hw = &adapter->hw;
4970 struct vf_data_storage *vf_data;
4973 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4974 u32 vmolr = rd32(E1000_VMOLR(i));
4975 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4977 vf_data = &adapter->vf_data[i];
4979 if ((vf_data->num_vf_mc_hashes > 30) ||
4980 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4981 vmolr |= E1000_VMOLR_MPME;
4982 } else if (vf_data->num_vf_mc_hashes) {
4983 vmolr |= E1000_VMOLR_ROMPE;
4984 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4985 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4987 wr32(E1000_VMOLR(i), vmolr);
4991 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4993 struct e1000_hw *hw = &adapter->hw;
4994 u32 pool_mask, reg, vid;
4997 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4999 /* Find the vlan filter for this id */
5000 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5001 reg = rd32(E1000_VLVF(i));
5003 /* remove the vf from the pool */
5006 /* if pool is empty then remove entry from vfta */
5007 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5008 (reg & E1000_VLVF_VLANID_ENABLE)) {
5010 vid = reg & E1000_VLVF_VLANID_MASK;
5011 igb_vfta_set(hw, vid, false);
5014 wr32(E1000_VLVF(i), reg);
5017 adapter->vf_data[vf].vlans_enabled = 0;
5020 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5022 struct e1000_hw *hw = &adapter->hw;
5025 /* The vlvf table only exists on 82576 hardware and newer */
5026 if (hw->mac.type < e1000_82576)
5029 /* we only need to do this if VMDq is enabled */
5030 if (!adapter->vfs_allocated_count)
5033 /* Find the vlan filter for this id */
5034 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5035 reg = rd32(E1000_VLVF(i));
5036 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5037 vid == (reg & E1000_VLVF_VLANID_MASK))
5042 if (i == E1000_VLVF_ARRAY_SIZE) {
5043 /* Did not find a matching VLAN ID entry that was
5044 * enabled. Search for a free filter entry, i.e.
5045 * one without the enable bit set
5047 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5048 reg = rd32(E1000_VLVF(i));
5049 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5053 if (i < E1000_VLVF_ARRAY_SIZE) {
5054 /* Found an enabled/available entry */
5055 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5057 /* if !enabled we need to set this up in vfta */
5058 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5059 /* add VID to filter table */
5060 igb_vfta_set(hw, vid, true);
5061 reg |= E1000_VLVF_VLANID_ENABLE;
5063 reg &= ~E1000_VLVF_VLANID_MASK;
5065 wr32(E1000_VLVF(i), reg);
5067 /* do not modify RLPML for PF devices */
5068 if (vf >= adapter->vfs_allocated_count)
5071 if (!adapter->vf_data[vf].vlans_enabled) {
5073 reg = rd32(E1000_VMOLR(vf));
5074 size = reg & E1000_VMOLR_RLPML_MASK;
5076 reg &= ~E1000_VMOLR_RLPML_MASK;
5078 wr32(E1000_VMOLR(vf), reg);
5081 adapter->vf_data[vf].vlans_enabled++;
5085 if (i < E1000_VLVF_ARRAY_SIZE) {
5086 /* remove vf from the pool */
5087 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5088 /* if pool is empty then remove entry from vfta */
5089 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5091 igb_vfta_set(hw, vid, false);
5093 wr32(E1000_VLVF(i), reg);
5095 /* do not modify RLPML for PF devices */
5096 if (vf >= adapter->vfs_allocated_count)
5099 adapter->vf_data[vf].vlans_enabled--;
5100 if (!adapter->vf_data[vf].vlans_enabled) {
5102 reg = rd32(E1000_VMOLR(vf));
5103 size = reg & E1000_VMOLR_RLPML_MASK;
5105 reg &= ~E1000_VMOLR_RLPML_MASK;
5107 wr32(E1000_VMOLR(vf), reg);
5114 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5116 struct e1000_hw *hw = &adapter->hw;
5119 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5121 wr32(E1000_VMVIR(vf), 0);
5124 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5125 int vf, u16 vlan, u8 qos)
5128 struct igb_adapter *adapter = netdev_priv(netdev);
5130 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5133 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5136 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5137 igb_set_vmolr(adapter, vf, !vlan);
5138 adapter->vf_data[vf].pf_vlan = vlan;
5139 adapter->vf_data[vf].pf_qos = qos;
5140 dev_info(&adapter->pdev->dev,
5141 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5142 if (test_bit(__IGB_DOWN, &adapter->state)) {
5143 dev_warn(&adapter->pdev->dev,
5144 "The VF VLAN has been set,"
5145 " but the PF device is not up.\n");
5146 dev_warn(&adapter->pdev->dev,
5147 "Bring the PF device up before"
5148 " attempting to use the VF device.\n");
5151 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5153 igb_set_vmvir(adapter, vlan, vf);
5154 igb_set_vmolr(adapter, vf, true);
5155 adapter->vf_data[vf].pf_vlan = 0;
5156 adapter->vf_data[vf].pf_qos = 0;
5162 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5164 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5165 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5167 return igb_vlvf_set(adapter, vid, add, vf);
5170 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5172 /* clear flags - except flag that indicates PF has set the MAC */
5173 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5174 adapter->vf_data[vf].last_nack = jiffies;
5176 /* reset offloads to defaults */
5177 igb_set_vmolr(adapter, vf, true);
5179 /* reset vlans for device */
5180 igb_clear_vf_vfta(adapter, vf);
5181 if (adapter->vf_data[vf].pf_vlan)
5182 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5183 adapter->vf_data[vf].pf_vlan,
5184 adapter->vf_data[vf].pf_qos);
5186 igb_clear_vf_vfta(adapter, vf);
5188 /* reset multicast table array for vf */
5189 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5191 /* Flush and reset the mta with the new values */
5192 igb_set_rx_mode(adapter->netdev);
5195 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5197 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5199 /* generate a new mac address as we were hotplug removed/added */
5200 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5201 random_ether_addr(vf_mac);
5203 /* process remaining reset events */
5204 igb_vf_reset(adapter, vf);
5207 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5209 struct e1000_hw *hw = &adapter->hw;
5210 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5211 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5213 u8 *addr = (u8 *)(&msgbuf[1]);
5215 /* process all the same items cleared in a function level reset */
5216 igb_vf_reset(adapter, vf);
5218 /* set vf mac address */
5219 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5221 /* enable transmit and receive for vf */
5222 reg = rd32(E1000_VFTE);
5223 wr32(E1000_VFTE, reg | (1 << vf));
5224 reg = rd32(E1000_VFRE);
5225 wr32(E1000_VFRE, reg | (1 << vf));
5227 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5229 /* reply to reset with ack and vf mac address */
5230 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5231 memcpy(addr, vf_mac, 6);
5232 igb_write_mbx(hw, msgbuf, 3, vf);
5235 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5238 * The VF MAC Address is stored in a packed array of bytes
5239 * starting at the second 32 bit word of the msg array
5241 unsigned char *addr = (char *)&msg[1];
5244 if (is_valid_ether_addr(addr))
5245 err = igb_set_vf_mac(adapter, vf, addr);
5250 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5252 struct e1000_hw *hw = &adapter->hw;
5253 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5254 u32 msg = E1000_VT_MSGTYPE_NACK;
5256 /* if device isn't clear to send it shouldn't be reading either */
5257 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5258 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5259 igb_write_mbx(hw, &msg, 1, vf);
5260 vf_data->last_nack = jiffies;
5264 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5266 struct pci_dev *pdev = adapter->pdev;
5267 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5268 struct e1000_hw *hw = &adapter->hw;
5269 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5272 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5275 /* if receive failed revoke VF CTS stats and restart init */
5276 dev_err(&pdev->dev, "Error receiving message from VF\n");
5277 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5278 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5283 /* this is a message we already processed, do nothing */
5284 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5288 * until the vf completes a reset it should not be
5289 * allowed to start any configuration.
5292 if (msgbuf[0] == E1000_VF_RESET) {
5293 igb_vf_reset_msg(adapter, vf);
5297 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5298 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5304 switch ((msgbuf[0] & 0xFFFF)) {
5305 case E1000_VF_SET_MAC_ADDR:
5307 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5308 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5310 dev_warn(&pdev->dev,
5311 "VF %d attempted to override administratively "
5312 "set MAC address\nReload the VF driver to "
5313 "resume operations\n", vf);
5315 case E1000_VF_SET_PROMISC:
5316 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5318 case E1000_VF_SET_MULTICAST:
5319 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5321 case E1000_VF_SET_LPE:
5322 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5324 case E1000_VF_SET_VLAN:
5326 if (vf_data->pf_vlan)
5327 dev_warn(&pdev->dev,
5328 "VF %d attempted to override administratively "
5329 "set VLAN tag\nReload the VF driver to "
5330 "resume operations\n", vf);
5332 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5335 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5340 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5342 /* notify the VF of the results of what it sent us */
5344 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5346 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5348 igb_write_mbx(hw, msgbuf, 1, vf);
5351 static void igb_msg_task(struct igb_adapter *adapter)
5353 struct e1000_hw *hw = &adapter->hw;
5356 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5357 /* process any reset requests */
5358 if (!igb_check_for_rst(hw, vf))
5359 igb_vf_reset_event(adapter, vf);
5361 /* process any messages pending */
5362 if (!igb_check_for_msg(hw, vf))
5363 igb_rcv_msg_from_vf(adapter, vf);
5365 /* process any acks */
5366 if (!igb_check_for_ack(hw, vf))
5367 igb_rcv_ack_from_vf(adapter, vf);
5372 * igb_set_uta - Set unicast filter table address
5373 * @adapter: board private structure
5375 * The unicast table address is a register array of 32-bit registers.
5376 * The table is meant to be used in a way similar to how the MTA is used
5377 * however due to certain limitations in the hardware it is necessary to
5378 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5379 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5381 static void igb_set_uta(struct igb_adapter *adapter)
5383 struct e1000_hw *hw = &adapter->hw;
5386 /* The UTA table only exists on 82576 hardware and newer */
5387 if (hw->mac.type < e1000_82576)
5390 /* we only need to do this if VMDq is enabled */
5391 if (!adapter->vfs_allocated_count)
5394 for (i = 0; i < hw->mac.uta_reg_count; i++)
5395 array_wr32(E1000_UTA, i, ~0);
5399 * igb_intr_msi - Interrupt Handler
5400 * @irq: interrupt number
5401 * @data: pointer to a network interface device structure
5403 static irqreturn_t igb_intr_msi(int irq, void *data)
5405 struct igb_adapter *adapter = data;
5406 struct igb_q_vector *q_vector = adapter->q_vector[0];
5407 struct e1000_hw *hw = &adapter->hw;
5408 /* read ICR disables interrupts using IAM */
5409 u32 icr = rd32(E1000_ICR);
5411 igb_write_itr(q_vector);
5413 if (icr & E1000_ICR_DRSTA)
5414 schedule_work(&adapter->reset_task);
5416 if (icr & E1000_ICR_DOUTSYNC) {
5417 /* HW is reporting DMA is out of sync */
5418 adapter->stats.doosync++;
5421 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5422 hw->mac.get_link_status = 1;
5423 if (!test_bit(__IGB_DOWN, &adapter->state))
5424 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5427 napi_schedule(&q_vector->napi);
5433 * igb_intr - Legacy Interrupt Handler
5434 * @irq: interrupt number
5435 * @data: pointer to a network interface device structure
5437 static irqreturn_t igb_intr(int irq, void *data)
5439 struct igb_adapter *adapter = data;
5440 struct igb_q_vector *q_vector = adapter->q_vector[0];
5441 struct e1000_hw *hw = &adapter->hw;
5442 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5443 * need for the IMC write */
5444 u32 icr = rd32(E1000_ICR);
5446 return IRQ_NONE; /* Not our interrupt */
5448 igb_write_itr(q_vector);
5450 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5451 * not set, then the adapter didn't send an interrupt */
5452 if (!(icr & E1000_ICR_INT_ASSERTED))
5455 if (icr & E1000_ICR_DRSTA)
5456 schedule_work(&adapter->reset_task);
5458 if (icr & E1000_ICR_DOUTSYNC) {
5459 /* HW is reporting DMA is out of sync */
5460 adapter->stats.doosync++;
5463 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5464 hw->mac.get_link_status = 1;
5465 /* guard against interrupt when we're going down */
5466 if (!test_bit(__IGB_DOWN, &adapter->state))
5467 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5470 napi_schedule(&q_vector->napi);
5475 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5477 struct igb_adapter *adapter = q_vector->adapter;
5478 struct e1000_hw *hw = &adapter->hw;
5480 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5481 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5482 if (!adapter->msix_entries)
5483 igb_set_itr(adapter);
5485 igb_update_ring_itr(q_vector);
5488 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5489 if (adapter->msix_entries)
5490 wr32(E1000_EIMS, q_vector->eims_value);
5492 igb_irq_enable(adapter);
5497 * igb_poll - NAPI Rx polling callback
5498 * @napi: napi polling structure
5499 * @budget: count of how many packets we should handle
5501 static int igb_poll(struct napi_struct *napi, int budget)
5503 struct igb_q_vector *q_vector = container_of(napi,
5504 struct igb_q_vector,
5506 bool clean_complete = true;
5508 #ifdef CONFIG_IGB_DCA
5509 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5510 igb_update_dca(q_vector);
5512 if (q_vector->tx_ring)
5513 clean_complete = igb_clean_tx_irq(q_vector);
5515 if (q_vector->rx_ring)
5516 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5518 /* If all work not completed, return budget and keep polling */
5519 if (!clean_complete)
5522 /* If not enough Rx work done, exit the polling mode */
5523 napi_complete(napi);
5524 igb_ring_irq_enable(q_vector);
5530 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5531 * @adapter: board private structure
5532 * @shhwtstamps: timestamp structure to update
5533 * @regval: unsigned 64bit system time value.
5535 * We need to convert the system time value stored in the RX/TXSTMP registers
5536 * into a hwtstamp which can be used by the upper level timestamping functions
5538 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5539 struct skb_shared_hwtstamps *shhwtstamps,
5545 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5546 * 24 to match clock shift we setup earlier.
5548 if (adapter->hw.mac.type == e1000_82580)
5549 regval <<= IGB_82580_TSYNC_SHIFT;
5551 ns = timecounter_cyc2time(&adapter->clock, regval);
5552 timecompare_update(&adapter->compare, ns);
5553 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5554 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5555 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5559 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5560 * @q_vector: pointer to q_vector containing needed info
5561 * @buffer: pointer to igb_tx_buffer structure
5563 * If we were asked to do hardware stamping and such a time stamp is
5564 * available, then it must have been for this skb here because we only
5565 * allow only one such packet into the queue.
5567 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5568 struct igb_tx_buffer *buffer_info)
5570 struct igb_adapter *adapter = q_vector->adapter;
5571 struct e1000_hw *hw = &adapter->hw;
5572 struct skb_shared_hwtstamps shhwtstamps;
5575 /* if skb does not support hw timestamp or TX stamp not valid exit */
5576 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5577 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5580 regval = rd32(E1000_TXSTMPL);
5581 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5583 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5584 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5588 * igb_clean_tx_irq - Reclaim resources after transmit completes
5589 * @q_vector: pointer to q_vector containing needed info
5590 * returns true if ring is completely cleaned
5592 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5594 struct igb_adapter *adapter = q_vector->adapter;
5595 struct igb_ring *tx_ring = q_vector->tx_ring;
5596 struct igb_tx_buffer *tx_buffer;
5597 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5598 unsigned int total_bytes = 0, total_packets = 0;
5599 unsigned int budget = q_vector->tx_work_limit;
5600 unsigned int i = tx_ring->next_to_clean;
5602 if (test_bit(__IGB_DOWN, &adapter->state))
5605 tx_buffer = &tx_ring->tx_buffer_info[i];
5606 tx_desc = IGB_TX_DESC(tx_ring, i);
5607 i -= tx_ring->count;
5609 for (; budget; budget--) {
5610 eop_desc = tx_buffer->next_to_watch;
5612 /* prevent any other reads prior to eop_desc */
5615 /* if next_to_watch is not set then there is no work pending */
5619 /* if DD is not set pending work has not been completed */
5620 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5623 /* clear next_to_watch to prevent false hangs */
5624 tx_buffer->next_to_watch = NULL;
5626 /* update the statistics for this packet */
5627 total_bytes += tx_buffer->bytecount;
5628 total_packets += tx_buffer->gso_segs;
5630 /* retrieve hardware timestamp */
5631 igb_tx_hwtstamp(q_vector, tx_buffer);
5634 dev_kfree_skb_any(tx_buffer->skb);
5635 tx_buffer->skb = NULL;
5637 /* unmap skb header data */
5638 dma_unmap_single(tx_ring->dev,
5643 /* clear last DMA location and unmap remaining buffers */
5644 while (tx_desc != eop_desc) {
5651 i -= tx_ring->count;
5652 tx_buffer = tx_ring->tx_buffer_info;
5653 tx_desc = IGB_TX_DESC(tx_ring, 0);
5656 /* unmap any remaining paged data */
5657 if (tx_buffer->dma) {
5658 dma_unmap_page(tx_ring->dev,
5665 /* clear last DMA location */
5668 /* move us one more past the eop_desc for start of next pkt */
5673 i -= tx_ring->count;
5674 tx_buffer = tx_ring->tx_buffer_info;
5675 tx_desc = IGB_TX_DESC(tx_ring, 0);
5679 i += tx_ring->count;
5680 tx_ring->next_to_clean = i;
5681 u64_stats_update_begin(&tx_ring->tx_syncp);
5682 tx_ring->tx_stats.bytes += total_bytes;
5683 tx_ring->tx_stats.packets += total_packets;
5684 u64_stats_update_end(&tx_ring->tx_syncp);
5685 tx_ring->total_bytes += total_bytes;
5686 tx_ring->total_packets += total_packets;
5688 if (tx_ring->detect_tx_hung) {
5689 struct e1000_hw *hw = &adapter->hw;
5691 eop_desc = tx_buffer->next_to_watch;
5693 /* Detect a transmit hang in hardware, this serializes the
5694 * check with the clearing of time_stamp and movement of i */
5695 tx_ring->detect_tx_hung = false;
5697 time_after(jiffies, tx_buffer->time_stamp +
5698 (adapter->tx_timeout_factor * HZ)) &&
5699 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5701 /* detected Tx unit hang */
5702 dev_err(tx_ring->dev,
5703 "Detected Tx Unit Hang\n"
5707 " next_to_use <%x>\n"
5708 " next_to_clean <%x>\n"
5709 "buffer_info[next_to_clean]\n"
5710 " time_stamp <%lx>\n"
5711 " next_to_watch <%p>\n"
5713 " desc.status <%x>\n",
5714 tx_ring->queue_index,
5715 rd32(E1000_TDH(tx_ring->reg_idx)),
5716 readl(tx_ring->tail),
5717 tx_ring->next_to_use,
5718 tx_ring->next_to_clean,
5719 tx_buffer->time_stamp,
5722 eop_desc->wb.status);
5723 netif_stop_subqueue(tx_ring->netdev,
5724 tx_ring->queue_index);
5726 /* we are about to reset, no point in enabling stuff */
5731 if (unlikely(total_packets &&
5732 netif_carrier_ok(tx_ring->netdev) &&
5733 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5734 /* Make sure that anybody stopping the queue after this
5735 * sees the new next_to_clean.
5738 if (__netif_subqueue_stopped(tx_ring->netdev,
5739 tx_ring->queue_index) &&
5740 !(test_bit(__IGB_DOWN, &adapter->state))) {
5741 netif_wake_subqueue(tx_ring->netdev,
5742 tx_ring->queue_index);
5744 u64_stats_update_begin(&tx_ring->tx_syncp);
5745 tx_ring->tx_stats.restart_queue++;
5746 u64_stats_update_end(&tx_ring->tx_syncp);
5753 static inline void igb_rx_checksum(struct igb_ring *ring,
5754 u32 status_err, struct sk_buff *skb)
5756 skb_checksum_none_assert(skb);
5758 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5759 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5760 (status_err & E1000_RXD_STAT_IXSM))
5763 /* TCP/UDP checksum error bit is set */
5765 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5767 * work around errata with sctp packets where the TCPE aka
5768 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5769 * packets, (aka let the stack check the crc32c)
5771 if ((skb->len == 60) &&
5772 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5773 u64_stats_update_begin(&ring->rx_syncp);
5774 ring->rx_stats.csum_err++;
5775 u64_stats_update_end(&ring->rx_syncp);
5777 /* let the stack verify checksum errors */
5780 /* It must be a TCP or UDP packet with a valid checksum */
5781 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5782 skb->ip_summed = CHECKSUM_UNNECESSARY;
5784 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5787 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5788 struct sk_buff *skb)
5790 struct igb_adapter *adapter = q_vector->adapter;
5791 struct e1000_hw *hw = &adapter->hw;
5795 * If this bit is set, then the RX registers contain the time stamp. No
5796 * other packet will be time stamped until we read these registers, so
5797 * read the registers to make them available again. Because only one
5798 * packet can be time stamped at a time, we know that the register
5799 * values must belong to this one here and therefore we don't need to
5800 * compare any of the additional attributes stored for it.
5802 * If nothing went wrong, then it should have a shared tx_flags that we
5803 * can turn into a skb_shared_hwtstamps.
5805 if (staterr & E1000_RXDADV_STAT_TSIP) {
5806 u32 *stamp = (u32 *)skb->data;
5807 regval = le32_to_cpu(*(stamp + 2));
5808 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5809 skb_pull(skb, IGB_TS_HDR_LEN);
5811 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5814 regval = rd32(E1000_RXSTMPL);
5815 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5818 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5820 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5822 /* HW will not DMA in data larger than the given buffer, even if it
5823 * parses the (NFS, of course) header to be larger. In that case, it
5824 * fills the header buffer and spills the rest into the page.
5826 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5827 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5828 if (hlen > IGB_RX_HDR_LEN)
5829 hlen = IGB_RX_HDR_LEN;
5833 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5835 struct igb_ring *rx_ring = q_vector->rx_ring;
5836 union e1000_adv_rx_desc *rx_desc;
5837 const int current_node = numa_node_id();
5838 unsigned int total_bytes = 0, total_packets = 0;
5840 u16 cleaned_count = igb_desc_unused(rx_ring);
5841 u16 i = rx_ring->next_to_clean;
5843 rx_desc = IGB_RX_DESC(rx_ring, i);
5844 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5846 while (staterr & E1000_RXD_STAT_DD) {
5847 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5848 struct sk_buff *skb = buffer_info->skb;
5849 union e1000_adv_rx_desc *next_rxd;
5851 buffer_info->skb = NULL;
5852 prefetch(skb->data);
5855 if (i == rx_ring->count)
5858 next_rxd = IGB_RX_DESC(rx_ring, i);
5862 * This memory barrier is needed to keep us from reading
5863 * any other fields out of the rx_desc until we know the
5864 * RXD_STAT_DD bit is set
5868 if (!skb_is_nonlinear(skb)) {
5869 __skb_put(skb, igb_get_hlen(rx_desc));
5870 dma_unmap_single(rx_ring->dev, buffer_info->dma,
5873 buffer_info->dma = 0;
5876 if (rx_desc->wb.upper.length) {
5877 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5879 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5881 buffer_info->page_offset,
5885 skb->data_len += length;
5886 skb->truesize += length;
5888 if ((page_count(buffer_info->page) != 1) ||
5889 (page_to_nid(buffer_info->page) != current_node))
5890 buffer_info->page = NULL;
5892 get_page(buffer_info->page);
5894 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5895 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5896 buffer_info->page_dma = 0;
5899 if (!(staterr & E1000_RXD_STAT_EOP)) {
5900 struct igb_rx_buffer *next_buffer;
5901 next_buffer = &rx_ring->rx_buffer_info[i];
5902 buffer_info->skb = next_buffer->skb;
5903 buffer_info->dma = next_buffer->dma;
5904 next_buffer->skb = skb;
5905 next_buffer->dma = 0;
5909 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5910 dev_kfree_skb_any(skb);
5914 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5915 igb_rx_hwtstamp(q_vector, staterr, skb);
5916 total_bytes += skb->len;
5919 igb_rx_checksum(rx_ring, staterr, skb);
5921 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5923 if (staterr & E1000_RXD_STAT_VP) {
5924 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5926 __vlan_hwaccel_put_tag(skb, vid);
5928 napi_gro_receive(&q_vector->napi, skb);
5936 /* return some buffers to hardware, one at a time is too slow */
5937 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5938 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5942 /* use prefetched values */
5944 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5947 rx_ring->next_to_clean = i;
5948 u64_stats_update_begin(&rx_ring->rx_syncp);
5949 rx_ring->rx_stats.packets += total_packets;
5950 rx_ring->rx_stats.bytes += total_bytes;
5951 u64_stats_update_end(&rx_ring->rx_syncp);
5952 rx_ring->total_packets += total_packets;
5953 rx_ring->total_bytes += total_bytes;
5956 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5961 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5962 struct igb_rx_buffer *bi)
5964 struct sk_buff *skb = bi->skb;
5965 dma_addr_t dma = bi->dma;
5971 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5975 rx_ring->rx_stats.alloc_failed++;
5979 /* initialize skb for ring */
5980 skb_record_rx_queue(skb, rx_ring->queue_index);
5983 dma = dma_map_single(rx_ring->dev, skb->data,
5984 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5986 if (dma_mapping_error(rx_ring->dev, dma)) {
5987 rx_ring->rx_stats.alloc_failed++;
5995 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5996 struct igb_rx_buffer *bi)
5998 struct page *page = bi->page;
5999 dma_addr_t page_dma = bi->page_dma;
6000 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6006 page = netdev_alloc_page(rx_ring->netdev);
6008 if (unlikely(!page)) {
6009 rx_ring->rx_stats.alloc_failed++;
6014 page_dma = dma_map_page(rx_ring->dev, page,
6015 page_offset, PAGE_SIZE / 2,
6018 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6019 rx_ring->rx_stats.alloc_failed++;
6023 bi->page_dma = page_dma;
6024 bi->page_offset = page_offset;
6029 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6030 * @adapter: address of board private structure
6032 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6034 union e1000_adv_rx_desc *rx_desc;
6035 struct igb_rx_buffer *bi;
6036 u16 i = rx_ring->next_to_use;
6038 rx_desc = IGB_RX_DESC(rx_ring, i);
6039 bi = &rx_ring->rx_buffer_info[i];
6040 i -= rx_ring->count;
6042 while (cleaned_count--) {
6043 if (!igb_alloc_mapped_skb(rx_ring, bi))
6046 /* Refresh the desc even if buffer_addrs didn't change
6047 * because each write-back erases this info. */
6048 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6050 if (!igb_alloc_mapped_page(rx_ring, bi))
6053 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6059 rx_desc = IGB_RX_DESC(rx_ring, 0);
6060 bi = rx_ring->rx_buffer_info;
6061 i -= rx_ring->count;
6064 /* clear the hdr_addr for the next_to_use descriptor */
6065 rx_desc->read.hdr_addr = 0;
6068 i += rx_ring->count;
6070 if (rx_ring->next_to_use != i) {
6071 rx_ring->next_to_use = i;
6073 /* Force memory writes to complete before letting h/w
6074 * know there are new descriptors to fetch. (Only
6075 * applicable for weak-ordered memory model archs,
6076 * such as IA-64). */
6078 writel(i, rx_ring->tail);
6088 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6090 struct igb_adapter *adapter = netdev_priv(netdev);
6091 struct mii_ioctl_data *data = if_mii(ifr);
6093 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6098 data->phy_id = adapter->hw.phy.addr;
6101 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6113 * igb_hwtstamp_ioctl - control hardware time stamping
6118 * Outgoing time stamping can be enabled and disabled. Play nice and
6119 * disable it when requested, although it shouldn't case any overhead
6120 * when no packet needs it. At most one packet in the queue may be
6121 * marked for time stamping, otherwise it would be impossible to tell
6122 * for sure to which packet the hardware time stamp belongs.
6124 * Incoming time stamping has to be configured via the hardware
6125 * filters. Not all combinations are supported, in particular event
6126 * type has to be specified. Matching the kind of event packet is
6127 * not supported, with the exception of "all V2 events regardless of
6131 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6132 struct ifreq *ifr, int cmd)
6134 struct igb_adapter *adapter = netdev_priv(netdev);
6135 struct e1000_hw *hw = &adapter->hw;
6136 struct hwtstamp_config config;
6137 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6138 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6139 u32 tsync_rx_cfg = 0;
6144 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6147 /* reserved for future extensions */
6151 switch (config.tx_type) {
6152 case HWTSTAMP_TX_OFF:
6154 case HWTSTAMP_TX_ON:
6160 switch (config.rx_filter) {
6161 case HWTSTAMP_FILTER_NONE:
6164 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6165 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6166 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6167 case HWTSTAMP_FILTER_ALL:
6169 * register TSYNCRXCFG must be set, therefore it is not
6170 * possible to time stamp both Sync and Delay_Req messages
6171 * => fall back to time stamping all packets
6173 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6174 config.rx_filter = HWTSTAMP_FILTER_ALL;
6176 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6177 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6178 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6181 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6182 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6183 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6186 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6187 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6188 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6189 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6192 config.rx_filter = HWTSTAMP_FILTER_SOME;
6194 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6195 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6196 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6197 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6200 config.rx_filter = HWTSTAMP_FILTER_SOME;
6202 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6203 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6204 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6205 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6206 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6213 if (hw->mac.type == e1000_82575) {
6214 if (tsync_rx_ctl | tsync_tx_ctl)
6220 * Per-packet timestamping only works if all packets are
6221 * timestamped, so enable timestamping in all packets as
6222 * long as one rx filter was configured.
6224 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6225 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6226 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6229 /* enable/disable TX */
6230 regval = rd32(E1000_TSYNCTXCTL);
6231 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6232 regval |= tsync_tx_ctl;
6233 wr32(E1000_TSYNCTXCTL, regval);
6235 /* enable/disable RX */
6236 regval = rd32(E1000_TSYNCRXCTL);
6237 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6238 regval |= tsync_rx_ctl;
6239 wr32(E1000_TSYNCRXCTL, regval);
6241 /* define which PTP packets are time stamped */
6242 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6244 /* define ethertype filter for timestamped packets */
6247 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6248 E1000_ETQF_1588 | /* enable timestamping */
6249 ETH_P_1588)); /* 1588 eth protocol type */
6251 wr32(E1000_ETQF(3), 0);
6253 #define PTP_PORT 319
6254 /* L4 Queue Filter[3]: filter by destination port and protocol */
6256 u32 ftqf = (IPPROTO_UDP /* UDP */
6257 | E1000_FTQF_VF_BP /* VF not compared */
6258 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6259 | E1000_FTQF_MASK); /* mask all inputs */
6260 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6262 wr32(E1000_IMIR(3), htons(PTP_PORT));
6263 wr32(E1000_IMIREXT(3),
6264 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6265 if (hw->mac.type == e1000_82576) {
6266 /* enable source port check */
6267 wr32(E1000_SPQF(3), htons(PTP_PORT));
6268 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6270 wr32(E1000_FTQF(3), ftqf);
6272 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6276 adapter->hwtstamp_config = config;
6278 /* clear TX/RX time stamp registers, just to be sure */
6279 regval = rd32(E1000_TXSTMPH);
6280 regval = rd32(E1000_RXSTMPH);
6282 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6292 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6298 return igb_mii_ioctl(netdev, ifr, cmd);
6300 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6306 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6308 struct igb_adapter *adapter = hw->back;
6311 cap_offset = adapter->pdev->pcie_cap;
6313 return -E1000_ERR_CONFIG;
6315 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6320 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6322 struct igb_adapter *adapter = hw->back;
6325 cap_offset = adapter->pdev->pcie_cap;
6327 return -E1000_ERR_CONFIG;
6329 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6334 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6336 struct igb_adapter *adapter = netdev_priv(netdev);
6337 struct e1000_hw *hw = &adapter->hw;
6340 igb_irq_disable(adapter);
6342 if (features & NETIF_F_HW_VLAN_RX) {
6343 /* enable VLAN tag insert/strip */
6344 ctrl = rd32(E1000_CTRL);
6345 ctrl |= E1000_CTRL_VME;
6346 wr32(E1000_CTRL, ctrl);
6348 /* Disable CFI check */
6349 rctl = rd32(E1000_RCTL);
6350 rctl &= ~E1000_RCTL_CFIEN;
6351 wr32(E1000_RCTL, rctl);
6353 /* disable VLAN tag insert/strip */
6354 ctrl = rd32(E1000_CTRL);
6355 ctrl &= ~E1000_CTRL_VME;
6356 wr32(E1000_CTRL, ctrl);
6359 igb_rlpml_set(adapter);
6361 if (!test_bit(__IGB_DOWN, &adapter->state))
6362 igb_irq_enable(adapter);
6365 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6367 struct igb_adapter *adapter = netdev_priv(netdev);
6368 struct e1000_hw *hw = &adapter->hw;
6369 int pf_id = adapter->vfs_allocated_count;
6371 /* attempt to add filter to vlvf array */
6372 igb_vlvf_set(adapter, vid, true, pf_id);
6374 /* add the filter since PF can receive vlans w/o entry in vlvf */
6375 igb_vfta_set(hw, vid, true);
6377 set_bit(vid, adapter->active_vlans);
6380 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6382 struct igb_adapter *adapter = netdev_priv(netdev);
6383 struct e1000_hw *hw = &adapter->hw;
6384 int pf_id = adapter->vfs_allocated_count;
6387 igb_irq_disable(adapter);
6389 if (!test_bit(__IGB_DOWN, &adapter->state))
6390 igb_irq_enable(adapter);
6392 /* remove vlan from VLVF table array */
6393 err = igb_vlvf_set(adapter, vid, false, pf_id);
6395 /* if vid was not present in VLVF just remove it from table */
6397 igb_vfta_set(hw, vid, false);
6399 clear_bit(vid, adapter->active_vlans);
6402 static void igb_restore_vlan(struct igb_adapter *adapter)
6406 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6407 igb_vlan_rx_add_vid(adapter->netdev, vid);
6410 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6412 struct pci_dev *pdev = adapter->pdev;
6413 struct e1000_mac_info *mac = &adapter->hw.mac;
6417 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6418 * for the switch() below to work */
6419 if ((spd & 1) || (dplx & ~1))
6422 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6423 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6424 spd != SPEED_1000 &&
6425 dplx != DUPLEX_FULL)
6428 switch (spd + dplx) {
6429 case SPEED_10 + DUPLEX_HALF:
6430 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6432 case SPEED_10 + DUPLEX_FULL:
6433 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6435 case SPEED_100 + DUPLEX_HALF:
6436 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6438 case SPEED_100 + DUPLEX_FULL:
6439 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6441 case SPEED_1000 + DUPLEX_FULL:
6443 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6445 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6452 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6456 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6458 struct net_device *netdev = pci_get_drvdata(pdev);
6459 struct igb_adapter *adapter = netdev_priv(netdev);
6460 struct e1000_hw *hw = &adapter->hw;
6461 u32 ctrl, rctl, status;
6462 u32 wufc = adapter->wol;
6467 netif_device_detach(netdev);
6469 if (netif_running(netdev))
6472 igb_clear_interrupt_scheme(adapter);
6475 retval = pci_save_state(pdev);
6480 status = rd32(E1000_STATUS);
6481 if (status & E1000_STATUS_LU)
6482 wufc &= ~E1000_WUFC_LNKC;
6485 igb_setup_rctl(adapter);
6486 igb_set_rx_mode(netdev);
6488 /* turn on all-multi mode if wake on multicast is enabled */
6489 if (wufc & E1000_WUFC_MC) {
6490 rctl = rd32(E1000_RCTL);
6491 rctl |= E1000_RCTL_MPE;
6492 wr32(E1000_RCTL, rctl);
6495 ctrl = rd32(E1000_CTRL);
6496 /* advertise wake from D3Cold */
6497 #define E1000_CTRL_ADVD3WUC 0x00100000
6498 /* phy power management enable */
6499 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6500 ctrl |= E1000_CTRL_ADVD3WUC;
6501 wr32(E1000_CTRL, ctrl);
6503 /* Allow time for pending master requests to run */
6504 igb_disable_pcie_master(hw);
6506 wr32(E1000_WUC, E1000_WUC_PME_EN);
6507 wr32(E1000_WUFC, wufc);
6510 wr32(E1000_WUFC, 0);
6513 *enable_wake = wufc || adapter->en_mng_pt;
6515 igb_power_down_link(adapter);
6517 igb_power_up_link(adapter);
6519 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6520 * would have already happened in close and is redundant. */
6521 igb_release_hw_control(adapter);
6523 pci_disable_device(pdev);
6529 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6534 retval = __igb_shutdown(pdev, &wake);
6539 pci_prepare_to_sleep(pdev);
6541 pci_wake_from_d3(pdev, false);
6542 pci_set_power_state(pdev, PCI_D3hot);
6548 static int igb_resume(struct pci_dev *pdev)
6550 struct net_device *netdev = pci_get_drvdata(pdev);
6551 struct igb_adapter *adapter = netdev_priv(netdev);
6552 struct e1000_hw *hw = &adapter->hw;
6555 pci_set_power_state(pdev, PCI_D0);
6556 pci_restore_state(pdev);
6557 pci_save_state(pdev);
6559 err = pci_enable_device_mem(pdev);
6562 "igb: Cannot enable PCI device from suspend\n");
6565 pci_set_master(pdev);
6567 pci_enable_wake(pdev, PCI_D3hot, 0);
6568 pci_enable_wake(pdev, PCI_D3cold, 0);
6570 if (igb_init_interrupt_scheme(adapter)) {
6571 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6577 /* let the f/w know that the h/w is now under the control of the
6579 igb_get_hw_control(adapter);
6581 wr32(E1000_WUS, ~0);
6583 if (netif_running(netdev)) {
6584 err = igb_open(netdev);
6589 netif_device_attach(netdev);
6595 static void igb_shutdown(struct pci_dev *pdev)
6599 __igb_shutdown(pdev, &wake);
6601 if (system_state == SYSTEM_POWER_OFF) {
6602 pci_wake_from_d3(pdev, wake);
6603 pci_set_power_state(pdev, PCI_D3hot);
6607 #ifdef CONFIG_NET_POLL_CONTROLLER
6609 * Polling 'interrupt' - used by things like netconsole to send skbs
6610 * without having to re-enable interrupts. It's not called while
6611 * the interrupt routine is executing.
6613 static void igb_netpoll(struct net_device *netdev)
6615 struct igb_adapter *adapter = netdev_priv(netdev);
6616 struct e1000_hw *hw = &adapter->hw;
6619 if (!adapter->msix_entries) {
6620 struct igb_q_vector *q_vector = adapter->q_vector[0];
6621 igb_irq_disable(adapter);
6622 napi_schedule(&q_vector->napi);
6626 for (i = 0; i < adapter->num_q_vectors; i++) {
6627 struct igb_q_vector *q_vector = adapter->q_vector[i];
6628 wr32(E1000_EIMC, q_vector->eims_value);
6629 napi_schedule(&q_vector->napi);
6632 #endif /* CONFIG_NET_POLL_CONTROLLER */
6635 * igb_io_error_detected - called when PCI error is detected
6636 * @pdev: Pointer to PCI device
6637 * @state: The current pci connection state
6639 * This function is called after a PCI bus error affecting
6640 * this device has been detected.
6642 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6643 pci_channel_state_t state)
6645 struct net_device *netdev = pci_get_drvdata(pdev);
6646 struct igb_adapter *adapter = netdev_priv(netdev);
6648 netif_device_detach(netdev);
6650 if (state == pci_channel_io_perm_failure)
6651 return PCI_ERS_RESULT_DISCONNECT;
6653 if (netif_running(netdev))
6655 pci_disable_device(pdev);
6657 /* Request a slot slot reset. */
6658 return PCI_ERS_RESULT_NEED_RESET;
6662 * igb_io_slot_reset - called after the pci bus has been reset.
6663 * @pdev: Pointer to PCI device
6665 * Restart the card from scratch, as if from a cold-boot. Implementation
6666 * resembles the first-half of the igb_resume routine.
6668 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6670 struct net_device *netdev = pci_get_drvdata(pdev);
6671 struct igb_adapter *adapter = netdev_priv(netdev);
6672 struct e1000_hw *hw = &adapter->hw;
6673 pci_ers_result_t result;
6676 if (pci_enable_device_mem(pdev)) {
6678 "Cannot re-enable PCI device after reset.\n");
6679 result = PCI_ERS_RESULT_DISCONNECT;
6681 pci_set_master(pdev);
6682 pci_restore_state(pdev);
6683 pci_save_state(pdev);
6685 pci_enable_wake(pdev, PCI_D3hot, 0);
6686 pci_enable_wake(pdev, PCI_D3cold, 0);
6689 wr32(E1000_WUS, ~0);
6690 result = PCI_ERS_RESULT_RECOVERED;
6693 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6695 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6696 "failed 0x%0x\n", err);
6697 /* non-fatal, continue */
6704 * igb_io_resume - called when traffic can start flowing again.
6705 * @pdev: Pointer to PCI device
6707 * This callback is called when the error recovery driver tells us that
6708 * its OK to resume normal operation. Implementation resembles the
6709 * second-half of the igb_resume routine.
6711 static void igb_io_resume(struct pci_dev *pdev)
6713 struct net_device *netdev = pci_get_drvdata(pdev);
6714 struct igb_adapter *adapter = netdev_priv(netdev);
6716 if (netif_running(netdev)) {
6717 if (igb_up(adapter)) {
6718 dev_err(&pdev->dev, "igb_up failed after reset\n");
6723 netif_device_attach(netdev);
6725 /* let the f/w know that the h/w is now under the control of the
6727 igb_get_hw_control(adapter);
6730 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6733 u32 rar_low, rar_high;
6734 struct e1000_hw *hw = &adapter->hw;
6736 /* HW expects these in little endian so we reverse the byte order
6737 * from network order (big endian) to little endian
6739 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6740 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6741 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6743 /* Indicate to hardware the Address is Valid. */
6744 rar_high |= E1000_RAH_AV;
6746 if (hw->mac.type == e1000_82575)
6747 rar_high |= E1000_RAH_POOL_1 * qsel;
6749 rar_high |= E1000_RAH_POOL_1 << qsel;
6751 wr32(E1000_RAL(index), rar_low);
6753 wr32(E1000_RAH(index), rar_high);
6757 static int igb_set_vf_mac(struct igb_adapter *adapter,
6758 int vf, unsigned char *mac_addr)
6760 struct e1000_hw *hw = &adapter->hw;
6761 /* VF MAC addresses start at end of receive addresses and moves
6762 * torwards the first, as a result a collision should not be possible */
6763 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6765 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6767 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6772 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6774 struct igb_adapter *adapter = netdev_priv(netdev);
6775 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6777 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6778 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6779 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6780 " change effective.");
6781 if (test_bit(__IGB_DOWN, &adapter->state)) {
6782 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6783 " but the PF device is not up.\n");
6784 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6785 " attempting to use the VF device.\n");
6787 return igb_set_vf_mac(adapter, vf, mac);
6790 static int igb_link_mbps(int internal_link_speed)
6792 switch (internal_link_speed) {
6802 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6809 /* Calculate the rate factor values to set */
6810 rf_int = link_speed / tx_rate;
6811 rf_dec = (link_speed - (rf_int * tx_rate));
6812 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6814 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6815 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6816 E1000_RTTBCNRC_RF_INT_MASK);
6817 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6822 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6823 wr32(E1000_RTTBCNRC, bcnrc_val);
6826 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6828 int actual_link_speed, i;
6829 bool reset_rate = false;
6831 /* VF TX rate limit was not set or not supported */
6832 if ((adapter->vf_rate_link_speed == 0) ||
6833 (adapter->hw.mac.type != e1000_82576))
6836 actual_link_speed = igb_link_mbps(adapter->link_speed);
6837 if (actual_link_speed != adapter->vf_rate_link_speed) {
6839 adapter->vf_rate_link_speed = 0;
6840 dev_info(&adapter->pdev->dev,
6841 "Link speed has been changed. VF Transmit "
6842 "rate is disabled\n");
6845 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6847 adapter->vf_data[i].tx_rate = 0;
6849 igb_set_vf_rate_limit(&adapter->hw, i,
6850 adapter->vf_data[i].tx_rate,
6855 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6857 struct igb_adapter *adapter = netdev_priv(netdev);
6858 struct e1000_hw *hw = &adapter->hw;
6859 int actual_link_speed;
6861 if (hw->mac.type != e1000_82576)
6864 actual_link_speed = igb_link_mbps(adapter->link_speed);
6865 if ((vf >= adapter->vfs_allocated_count) ||
6866 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6867 (tx_rate < 0) || (tx_rate > actual_link_speed))
6870 adapter->vf_rate_link_speed = actual_link_speed;
6871 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6872 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6877 static int igb_ndo_get_vf_config(struct net_device *netdev,
6878 int vf, struct ifla_vf_info *ivi)
6880 struct igb_adapter *adapter = netdev_priv(netdev);
6881 if (vf >= adapter->vfs_allocated_count)
6884 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6885 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6886 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6887 ivi->qos = adapter->vf_data[vf].pf_qos;
6891 static void igb_vmm_control(struct igb_adapter *adapter)
6893 struct e1000_hw *hw = &adapter->hw;
6896 switch (hw->mac.type) {
6899 /* replication is not supported for 82575 */
6902 /* notify HW that the MAC is adding vlan tags */
6903 reg = rd32(E1000_DTXCTL);
6904 reg |= E1000_DTXCTL_VLAN_ADDED;
6905 wr32(E1000_DTXCTL, reg);
6907 /* enable replication vlan tag stripping */
6908 reg = rd32(E1000_RPLOLR);
6909 reg |= E1000_RPLOLR_STRVLAN;
6910 wr32(E1000_RPLOLR, reg);
6912 /* none of the above registers are supported by i350 */
6916 if (adapter->vfs_allocated_count) {
6917 igb_vmdq_set_loopback_pf(hw, true);
6918 igb_vmdq_set_replication_pf(hw, true);
6919 igb_vmdq_set_anti_spoofing_pf(hw, true,
6920 adapter->vfs_allocated_count);
6922 igb_vmdq_set_loopback_pf(hw, false);
6923 igb_vmdq_set_replication_pf(hw, false);