1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
55 #include <linux/dca.h>
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
70 static const struct e1000_info *igb_info_tbl[] = {
71 [board_82575] = &e1000_82575_info,
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100 /* required last entry */
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160 int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163 struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
167 static int igb_suspend(struct pci_dev *, pm_message_t);
168 static int igb_resume(struct pci_dev *);
170 static void igb_shutdown(struct pci_dev *);
171 #ifdef CONFIG_IGB_DCA
172 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
173 static struct notifier_block dca_notifier = {
174 .notifier_call = igb_notify_dca,
179 #ifdef CONFIG_NET_POLL_CONTROLLER
180 /* for netdump / net console */
181 static void igb_netpoll(struct net_device *);
183 #ifdef CONFIG_PCI_IOV
184 static unsigned int max_vfs = 0;
185 module_param(max_vfs, uint, 0);
186 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
187 "per physical function");
188 #endif /* CONFIG_PCI_IOV */
190 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
191 pci_channel_state_t);
192 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
193 static void igb_io_resume(struct pci_dev *);
195 static struct pci_error_handlers igb_err_handler = {
196 .error_detected = igb_io_error_detected,
197 .slot_reset = igb_io_slot_reset,
198 .resume = igb_io_resume,
202 static struct pci_driver igb_driver = {
203 .name = igb_driver_name,
204 .id_table = igb_pci_tbl,
206 .remove = __devexit_p(igb_remove),
208 /* Power Management Hooks */
209 .suspend = igb_suspend,
210 .resume = igb_resume,
212 .shutdown = igb_shutdown,
213 .err_handler = &igb_err_handler
216 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
217 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
218 MODULE_LICENSE("GPL");
219 MODULE_VERSION(DRV_VERSION);
221 struct igb_reg_info {
226 static const struct igb_reg_info igb_reg_info_tbl[] = {
228 /* General Registers */
229 {E1000_CTRL, "CTRL"},
230 {E1000_STATUS, "STATUS"},
231 {E1000_CTRL_EXT, "CTRL_EXT"},
233 /* Interrupt Registers */
237 {E1000_RCTL, "RCTL"},
238 {E1000_RDLEN(0), "RDLEN"},
239 {E1000_RDH(0), "RDH"},
240 {E1000_RDT(0), "RDT"},
241 {E1000_RXDCTL(0), "RXDCTL"},
242 {E1000_RDBAL(0), "RDBAL"},
243 {E1000_RDBAH(0), "RDBAH"},
246 {E1000_TCTL, "TCTL"},
247 {E1000_TDBAL(0), "TDBAL"},
248 {E1000_TDBAH(0), "TDBAH"},
249 {E1000_TDLEN(0), "TDLEN"},
250 {E1000_TDH(0), "TDH"},
251 {E1000_TDT(0), "TDT"},
252 {E1000_TXDCTL(0), "TXDCTL"},
253 {E1000_TDFH, "TDFH"},
254 {E1000_TDFT, "TDFT"},
255 {E1000_TDFHS, "TDFHS"},
256 {E1000_TDFPC, "TDFPC"},
258 /* List Terminator */
263 * igb_regdump - register printout routine
265 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
271 switch (reginfo->ofs) {
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDLEN(n));
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDH(n));
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RDT(n));
284 case E1000_RXDCTL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RXDCTL(n));
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAL(n));
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAH(n));
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDBAL(n));
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDBAH(n));
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDLEN(n));
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDH(n));
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TDT(n));
316 case E1000_TXDCTL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_TXDCTL(n));
321 printk(KERN_INFO "%-15s %08x\n",
322 reginfo->name, rd32(reginfo->ofs));
326 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
327 printk(KERN_INFO "%-15s ", rname);
328 for (n = 0; n < 4; n++)
329 printk(KERN_CONT "%08x ", regs[n]);
330 printk(KERN_CONT "\n");
334 * igb_dump - Print registers, tx-rings and rx-rings
336 static void igb_dump(struct igb_adapter *adapter)
338 struct net_device *netdev = adapter->netdev;
339 struct e1000_hw *hw = &adapter->hw;
340 struct igb_reg_info *reginfo;
341 struct igb_ring *tx_ring;
342 union e1000_adv_tx_desc *tx_desc;
343 struct my_u0 { u64 a; u64 b; } *u0;
344 struct igb_ring *rx_ring;
345 union e1000_adv_rx_desc *rx_desc;
349 if (!netif_msg_hw(adapter))
352 /* Print netdevice Info */
354 dev_info(&adapter->pdev->dev, "Net device Info\n");
355 printk(KERN_INFO "Device Name state "
356 "trans_start last_rx\n");
357 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
364 /* Print Registers */
365 dev_info(&adapter->pdev->dev, "Register Dump\n");
366 printk(KERN_INFO " Register Name Value\n");
367 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
368 reginfo->name; reginfo++) {
369 igb_regdump(hw, reginfo);
372 /* Print TX Ring Summary */
373 if (!netdev || !netif_running(netdev))
376 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
377 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
378 " leng ntw timestamp\n");
379 for (n = 0; n < adapter->num_tx_queues; n++) {
380 struct igb_tx_buffer *buffer_info;
381 tx_ring = adapter->tx_ring[n];
382 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
383 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
384 n, tx_ring->next_to_use, tx_ring->next_to_clean,
385 (u64)buffer_info->dma,
387 buffer_info->next_to_watch,
388 (u64)buffer_info->time_stamp);
392 if (!netif_msg_tx_done(adapter))
393 goto rx_ring_summary;
395 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
397 /* Transmit Descriptor Formats
399 * Advanced Transmit Descriptor
400 * +--------------------------------------------------------------+
401 * 0 | Buffer Address [63:0] |
402 * +--------------------------------------------------------------+
403 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
404 * +--------------------------------------------------------------+
405 * 63 46 45 40 39 38 36 35 32 31 24 15 0
408 for (n = 0; n < adapter->num_tx_queues; n++) {
409 tx_ring = adapter->tx_ring[n];
410 printk(KERN_INFO "------------------------------------\n");
411 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
412 printk(KERN_INFO "------------------------------------\n");
413 printk(KERN_INFO "T [desc] [address 63:0 ] "
414 "[PlPOCIStDDM Ln] [bi->dma ] "
415 "leng ntw timestamp bi->skb\n");
417 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
418 struct igb_tx_buffer *buffer_info;
419 tx_desc = IGB_TX_DESC(tx_ring, i);
420 buffer_info = &tx_ring->tx_buffer_info[i];
421 u0 = (struct my_u0 *)tx_desc;
422 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
423 " %04X %p %016llX %p", i,
426 (u64)buffer_info->dma,
428 buffer_info->next_to_watch,
429 (u64)buffer_info->time_stamp,
431 if (i == tx_ring->next_to_use &&
432 i == tx_ring->next_to_clean)
433 printk(KERN_CONT " NTC/U\n");
434 else if (i == tx_ring->next_to_use)
435 printk(KERN_CONT " NTU\n");
436 else if (i == tx_ring->next_to_clean)
437 printk(KERN_CONT " NTC\n");
439 printk(KERN_CONT "\n");
441 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
442 print_hex_dump(KERN_INFO, "",
444 16, 1, phys_to_virt(buffer_info->dma),
445 buffer_info->length, true);
449 /* Print RX Rings Summary */
451 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
452 printk(KERN_INFO "Queue [NTU] [NTC]\n");
453 for (n = 0; n < adapter->num_rx_queues; n++) {
454 rx_ring = adapter->rx_ring[n];
455 printk(KERN_INFO " %5d %5X %5X\n", n,
456 rx_ring->next_to_use, rx_ring->next_to_clean);
460 if (!netif_msg_rx_status(adapter))
463 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
465 /* Advanced Receive Descriptor (Read) Format
467 * +-----------------------------------------------------+
468 * 0 | Packet Buffer Address [63:1] |A0/NSE|
469 * +----------------------------------------------+------+
470 * 8 | Header Buffer Address [63:1] | DD |
471 * +-----------------------------------------------------+
474 * Advanced Receive Descriptor (Write-Back) Format
476 * 63 48 47 32 31 30 21 20 17 16 4 3 0
477 * +------------------------------------------------------+
478 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
479 * | Checksum Ident | | | | Type | Type |
480 * +------------------------------------------------------+
481 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
482 * +------------------------------------------------------+
483 * 63 48 47 32 31 20 19 0
486 for (n = 0; n < adapter->num_rx_queues; n++) {
487 rx_ring = adapter->rx_ring[n];
488 printk(KERN_INFO "------------------------------------\n");
489 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
490 printk(KERN_INFO "------------------------------------\n");
491 printk(KERN_INFO "R [desc] [ PktBuf A0] "
492 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
493 "<-- Adv Rx Read format\n");
494 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
495 "[vl er S cks ln] ---------------- [bi->skb] "
496 "<-- Adv Rx Write-Back format\n");
498 for (i = 0; i < rx_ring->count; i++) {
499 struct igb_rx_buffer *buffer_info;
500 buffer_info = &rx_ring->rx_buffer_info[i];
501 rx_desc = IGB_RX_DESC(rx_ring, i);
502 u0 = (struct my_u0 *)rx_desc;
503 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
504 if (staterr & E1000_RXD_STAT_DD) {
505 /* Descriptor Done */
506 printk(KERN_INFO "RWB[0x%03X] %016llX "
507 "%016llX ---------------- %p", i,
512 printk(KERN_INFO "R [0x%03X] %016llX "
513 "%016llX %016llX %p", i,
516 (u64)buffer_info->dma,
519 if (netif_msg_pktdata(adapter)) {
520 print_hex_dump(KERN_INFO, "",
523 phys_to_virt(buffer_info->dma),
524 IGB_RX_HDR_LEN, true);
525 print_hex_dump(KERN_INFO, "",
529 buffer_info->page_dma +
530 buffer_info->page_offset),
535 if (i == rx_ring->next_to_use)
536 printk(KERN_CONT " NTU\n");
537 else if (i == rx_ring->next_to_clean)
538 printk(KERN_CONT " NTC\n");
540 printk(KERN_CONT "\n");
551 * igb_read_clock - read raw cycle counter (to be used by time counter)
553 static cycle_t igb_read_clock(const struct cyclecounter *tc)
555 struct igb_adapter *adapter =
556 container_of(tc, struct igb_adapter, cycles);
557 struct e1000_hw *hw = &adapter->hw;
562 * The timestamp latches on lowest register read. For the 82580
563 * the lowest register is SYSTIMR instead of SYSTIML. However we never
564 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
566 if (hw->mac.type == e1000_82580) {
567 stamp = rd32(E1000_SYSTIMR) >> 8;
568 shift = IGB_82580_TSYNC_SHIFT;
571 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
572 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
577 * igb_get_hw_dev - return device
578 * used by hardware layer to print debugging information
580 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
582 struct igb_adapter *adapter = hw->back;
583 return adapter->netdev;
587 * igb_init_module - Driver Registration Routine
589 * igb_init_module is the first routine called when the driver is
590 * loaded. All it does is register with the PCI subsystem.
592 static int __init igb_init_module(void)
595 printk(KERN_INFO "%s - version %s\n",
596 igb_driver_string, igb_driver_version);
598 printk(KERN_INFO "%s\n", igb_copyright);
600 #ifdef CONFIG_IGB_DCA
601 dca_register_notify(&dca_notifier);
603 ret = pci_register_driver(&igb_driver);
607 module_init(igb_init_module);
610 * igb_exit_module - Driver Exit Cleanup Routine
612 * igb_exit_module is called just before the driver is removed
615 static void __exit igb_exit_module(void)
617 #ifdef CONFIG_IGB_DCA
618 dca_unregister_notify(&dca_notifier);
620 pci_unregister_driver(&igb_driver);
623 module_exit(igb_exit_module);
625 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
627 * igb_cache_ring_register - Descriptor ring to register mapping
628 * @adapter: board private structure to initialize
630 * Once we know the feature-set enabled for the device, we'll cache
631 * the register offset the descriptor ring is assigned to.
633 static void igb_cache_ring_register(struct igb_adapter *adapter)
636 u32 rbase_offset = adapter->vfs_allocated_count;
638 switch (adapter->hw.mac.type) {
640 /* The queues are allocated for virtualization such that VF 0
641 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
642 * In order to avoid collision we start at the first free queue
643 * and continue consuming queues in the same sequence
645 if (adapter->vfs_allocated_count) {
646 for (; i < adapter->rss_queues; i++)
647 adapter->rx_ring[i]->reg_idx = rbase_offset +
654 for (; i < adapter->num_rx_queues; i++)
655 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
656 for (; j < adapter->num_tx_queues; j++)
657 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
662 static void igb_free_queues(struct igb_adapter *adapter)
666 for (i = 0; i < adapter->num_tx_queues; i++) {
667 kfree(adapter->tx_ring[i]);
668 adapter->tx_ring[i] = NULL;
670 for (i = 0; i < adapter->num_rx_queues; i++) {
671 kfree(adapter->rx_ring[i]);
672 adapter->rx_ring[i] = NULL;
674 adapter->num_rx_queues = 0;
675 adapter->num_tx_queues = 0;
679 * igb_alloc_queues - Allocate memory for all rings
680 * @adapter: board private structure to initialize
682 * We allocate one ring per queue at run-time since we don't know the
683 * number of queues at compile-time.
685 static int igb_alloc_queues(struct igb_adapter *adapter)
687 struct igb_ring *ring;
689 int orig_node = adapter->node;
691 for (i = 0; i < adapter->num_tx_queues; i++) {
692 if (orig_node == -1) {
693 int cur_node = next_online_node(adapter->node);
694 if (cur_node == MAX_NUMNODES)
695 cur_node = first_online_node;
696 adapter->node = cur_node;
698 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704 ring->count = adapter->tx_ring_count;
705 ring->queue_index = i;
706 ring->dev = &adapter->pdev->dev;
707 ring->netdev = adapter->netdev;
708 ring->numa_node = adapter->node;
709 /* For 82575, context index must be unique per ring. */
710 if (adapter->hw.mac.type == e1000_82575)
711 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
712 adapter->tx_ring[i] = ring;
714 /* Restore the adapter's original node */
715 adapter->node = orig_node;
717 for (i = 0; i < adapter->num_rx_queues; i++) {
718 if (orig_node == -1) {
719 int cur_node = next_online_node(adapter->node);
720 if (cur_node == MAX_NUMNODES)
721 cur_node = first_online_node;
722 adapter->node = cur_node;
724 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
727 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
730 ring->count = adapter->rx_ring_count;
731 ring->queue_index = i;
732 ring->dev = &adapter->pdev->dev;
733 ring->netdev = adapter->netdev;
734 ring->numa_node = adapter->node;
735 /* enable rx checksum */
736 set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
737 /* set flag indicating ring supports SCTP checksum offload */
738 if (adapter->hw.mac.type >= e1000_82576)
739 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
740 adapter->rx_ring[i] = ring;
742 /* Restore the adapter's original node */
743 adapter->node = orig_node;
745 igb_cache_ring_register(adapter);
750 /* Restore the adapter's original node */
751 adapter->node = orig_node;
752 igb_free_queues(adapter);
757 #define IGB_N0_QUEUE -1
758 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
761 struct igb_adapter *adapter = q_vector->adapter;
762 struct e1000_hw *hw = &adapter->hw;
764 int rx_queue = IGB_N0_QUEUE;
765 int tx_queue = IGB_N0_QUEUE;
767 if (q_vector->rx.ring)
768 rx_queue = q_vector->rx.ring->reg_idx;
769 if (q_vector->tx.ring)
770 tx_queue = q_vector->tx.ring->reg_idx;
772 switch (hw->mac.type) {
774 /* The 82575 assigns vectors using a bitmask, which matches the
775 bitmask for the EICR/EIMS/EIMC registers. To assign one
776 or more queues to a vector, we write the appropriate bits
777 into the MSIXBM register for that vector. */
778 if (rx_queue > IGB_N0_QUEUE)
779 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
780 if (tx_queue > IGB_N0_QUEUE)
781 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
782 if (!adapter->msix_entries && msix_vector == 0)
783 msixbm |= E1000_EIMS_OTHER;
784 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
785 q_vector->eims_value = msixbm;
788 /* 82576 uses a table-based method for assigning vectors.
789 Each queue has a single entry in the table to which we write
790 a vector number along with a "valid" bit. Sadly, the layout
791 of the table is somewhat counterintuitive. */
792 if (rx_queue > IGB_N0_QUEUE) {
793 index = (rx_queue & 0x7);
794 ivar = array_rd32(E1000_IVAR0, index);
796 /* vector goes into low byte of register */
797 ivar = ivar & 0xFFFFFF00;
798 ivar |= msix_vector | E1000_IVAR_VALID;
800 /* vector goes into third byte of register */
801 ivar = ivar & 0xFF00FFFF;
802 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
804 array_wr32(E1000_IVAR0, index, ivar);
806 if (tx_queue > IGB_N0_QUEUE) {
807 index = (tx_queue & 0x7);
808 ivar = array_rd32(E1000_IVAR0, index);
810 /* vector goes into second byte of register */
811 ivar = ivar & 0xFFFF00FF;
812 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
814 /* vector goes into high byte of register */
815 ivar = ivar & 0x00FFFFFF;
816 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
818 array_wr32(E1000_IVAR0, index, ivar);
820 q_vector->eims_value = 1 << msix_vector;
824 /* 82580 uses the same table-based approach as 82576 but has fewer
825 entries as a result we carry over for queues greater than 4. */
826 if (rx_queue > IGB_N0_QUEUE) {
827 index = (rx_queue >> 1);
828 ivar = array_rd32(E1000_IVAR0, index);
829 if (rx_queue & 0x1) {
830 /* vector goes into third byte of register */
831 ivar = ivar & 0xFF00FFFF;
832 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
834 /* vector goes into low byte of register */
835 ivar = ivar & 0xFFFFFF00;
836 ivar |= msix_vector | E1000_IVAR_VALID;
838 array_wr32(E1000_IVAR0, index, ivar);
840 if (tx_queue > IGB_N0_QUEUE) {
841 index = (tx_queue >> 1);
842 ivar = array_rd32(E1000_IVAR0, index);
843 if (tx_queue & 0x1) {
844 /* vector goes into high byte of register */
845 ivar = ivar & 0x00FFFFFF;
846 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
848 /* vector goes into second byte of register */
849 ivar = ivar & 0xFFFF00FF;
850 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
852 array_wr32(E1000_IVAR0, index, ivar);
854 q_vector->eims_value = 1 << msix_vector;
861 /* add q_vector eims value to global eims_enable_mask */
862 adapter->eims_enable_mask |= q_vector->eims_value;
864 /* configure q_vector to set itr on first interrupt */
865 q_vector->set_itr = 1;
869 * igb_configure_msix - Configure MSI-X hardware
871 * igb_configure_msix sets up the hardware to properly
872 * generate MSI-X interrupts.
874 static void igb_configure_msix(struct igb_adapter *adapter)
878 struct e1000_hw *hw = &adapter->hw;
880 adapter->eims_enable_mask = 0;
882 /* set vector for other causes, i.e. link changes */
883 switch (hw->mac.type) {
885 tmp = rd32(E1000_CTRL_EXT);
886 /* enable MSI-X PBA support*/
887 tmp |= E1000_CTRL_EXT_PBA_CLR;
889 /* Auto-Mask interrupts upon ICR read. */
890 tmp |= E1000_CTRL_EXT_EIAME;
891 tmp |= E1000_CTRL_EXT_IRCA;
893 wr32(E1000_CTRL_EXT, tmp);
895 /* enable msix_other interrupt */
896 array_wr32(E1000_MSIXBM(0), vector++,
898 adapter->eims_other = E1000_EIMS_OTHER;
905 /* Turn on MSI-X capability first, or our settings
906 * won't stick. And it will take days to debug. */
907 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908 E1000_GPIE_PBA | E1000_GPIE_EIAME |
911 /* enable msix_other interrupt */
912 adapter->eims_other = 1 << vector;
913 tmp = (vector++ | E1000_IVAR_VALID) << 8;
915 wr32(E1000_IVAR_MISC, tmp);
918 /* do nothing, since nothing else supports MSI-X */
920 } /* switch (hw->mac.type) */
922 adapter->eims_enable_mask |= adapter->eims_other;
924 for (i = 0; i < adapter->num_q_vectors; i++)
925 igb_assign_vector(adapter->q_vector[i], vector++);
931 * igb_request_msix - Initialize MSI-X interrupts
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
936 static int igb_request_msix(struct igb_adapter *adapter)
938 struct net_device *netdev = adapter->netdev;
939 struct e1000_hw *hw = &adapter->hw;
940 int i, err = 0, vector = 0;
942 err = request_irq(adapter->msix_entries[vector].vector,
943 igb_msix_other, 0, netdev->name, adapter);
948 for (i = 0; i < adapter->num_q_vectors; i++) {
949 struct igb_q_vector *q_vector = adapter->q_vector[i];
951 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
953 if (q_vector->rx.ring && q_vector->tx.ring)
954 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955 q_vector->rx.ring->queue_index);
956 else if (q_vector->tx.ring)
957 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958 q_vector->tx.ring->queue_index);
959 else if (q_vector->rx.ring)
960 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961 q_vector->rx.ring->queue_index);
963 sprintf(q_vector->name, "%s-unused", netdev->name);
965 err = request_irq(adapter->msix_entries[vector].vector,
966 igb_msix_ring, 0, q_vector->name,
973 igb_configure_msix(adapter);
979 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
981 if (adapter->msix_entries) {
982 pci_disable_msix(adapter->pdev);
983 kfree(adapter->msix_entries);
984 adapter->msix_entries = NULL;
985 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986 pci_disable_msi(adapter->pdev);
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
994 * This function frees the memory allocated to the q_vectors. In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
998 static void igb_free_q_vectors(struct igb_adapter *adapter)
1002 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004 adapter->q_vector[v_idx] = NULL;
1007 netif_napi_del(&q_vector->napi);
1010 adapter->num_q_vectors = 0;
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1019 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1021 igb_free_queues(adapter);
1022 igb_free_q_vectors(adapter);
1023 igb_reset_interrupt_capability(adapter);
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1032 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1037 /* Number of supported queues. */
1038 adapter->num_rx_queues = adapter->rss_queues;
1039 if (adapter->vfs_allocated_count)
1040 adapter->num_tx_queues = 1;
1042 adapter->num_tx_queues = adapter->rss_queues;
1044 /* start with one vector for every rx queue */
1045 numvecs = adapter->num_rx_queues;
1047 /* if tx handler is separate add 1 for every tx queue */
1048 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049 numvecs += adapter->num_tx_queues;
1051 /* store the number of vectors reserved for queues */
1052 adapter->num_q_vectors = numvecs;
1054 /* add 1 vector for link status interrupts */
1056 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1058 if (!adapter->msix_entries)
1061 for (i = 0; i < numvecs; i++)
1062 adapter->msix_entries[i].entry = i;
1064 err = pci_enable_msix(adapter->pdev,
1065 adapter->msix_entries,
1070 igb_reset_interrupt_capability(adapter);
1072 /* If we can't do MSI-X, try MSI */
1074 #ifdef CONFIG_PCI_IOV
1075 /* disable SR-IOV for non MSI-X configurations */
1076 if (adapter->vf_data) {
1077 struct e1000_hw *hw = &adapter->hw;
1078 /* disable iov and allow time for transactions to clear */
1079 pci_disable_sriov(adapter->pdev);
1082 kfree(adapter->vf_data);
1083 adapter->vf_data = NULL;
1084 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1087 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1090 adapter->vfs_allocated_count = 0;
1091 adapter->rss_queues = 1;
1092 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1093 adapter->num_rx_queues = 1;
1094 adapter->num_tx_queues = 1;
1095 adapter->num_q_vectors = 1;
1096 if (!pci_enable_msi(adapter->pdev))
1097 adapter->flags |= IGB_FLAG_HAS_MSI;
1099 /* Notify the stack of the (possibly) reduced queue counts. */
1100 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1101 return netif_set_real_num_rx_queues(adapter->netdev,
1102 adapter->num_rx_queues);
1106 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1107 * @adapter: board private structure to initialize
1109 * We allocate one q_vector per queue interrupt. If allocation fails we
1112 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1114 struct igb_q_vector *q_vector;
1115 struct e1000_hw *hw = &adapter->hw;
1117 int orig_node = adapter->node;
1119 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1120 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1121 adapter->num_tx_queues)) &&
1122 (adapter->num_rx_queues == v_idx))
1123 adapter->node = orig_node;
1124 if (orig_node == -1) {
1125 int cur_node = next_online_node(adapter->node);
1126 if (cur_node == MAX_NUMNODES)
1127 cur_node = first_online_node;
1128 adapter->node = cur_node;
1130 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1133 q_vector = kzalloc(sizeof(struct igb_q_vector),
1137 q_vector->adapter = adapter;
1138 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1139 q_vector->itr_val = IGB_START_ITR;
1140 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1141 adapter->q_vector[v_idx] = q_vector;
1143 /* Restore the adapter's original node */
1144 adapter->node = orig_node;
1149 /* Restore the adapter's original node */
1150 adapter->node = orig_node;
1151 igb_free_q_vectors(adapter);
1155 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1156 int ring_idx, int v_idx)
1158 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1160 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1161 q_vector->rx.ring->q_vector = q_vector;
1162 q_vector->rx.count++;
1163 q_vector->itr_val = adapter->rx_itr_setting;
1164 if (q_vector->itr_val && q_vector->itr_val <= 3)
1165 q_vector->itr_val = IGB_START_ITR;
1168 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1169 int ring_idx, int v_idx)
1171 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1173 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1174 q_vector->tx.ring->q_vector = q_vector;
1175 q_vector->tx.count++;
1176 q_vector->itr_val = adapter->tx_itr_setting;
1177 q_vector->tx.work_limit = adapter->tx_work_limit;
1178 if (q_vector->itr_val && q_vector->itr_val <= 3)
1179 q_vector->itr_val = IGB_START_ITR;
1183 * igb_map_ring_to_vector - maps allocated queues to vectors
1185 * This function maps the recently allocated queues to vectors.
1187 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1192 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1193 (adapter->num_q_vectors < adapter->num_tx_queues))
1196 if (adapter->num_q_vectors >=
1197 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1198 for (i = 0; i < adapter->num_rx_queues; i++)
1199 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1200 for (i = 0; i < adapter->num_tx_queues; i++)
1201 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1203 for (i = 0; i < adapter->num_rx_queues; i++) {
1204 if (i < adapter->num_tx_queues)
1205 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1206 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1208 for (; i < adapter->num_tx_queues; i++)
1209 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1215 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1217 * This function initializes the interrupts and allocates all of the queues.
1219 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1221 struct pci_dev *pdev = adapter->pdev;
1224 err = igb_set_interrupt_capability(adapter);
1228 err = igb_alloc_q_vectors(adapter);
1230 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1231 goto err_alloc_q_vectors;
1234 err = igb_alloc_queues(adapter);
1236 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1237 goto err_alloc_queues;
1240 err = igb_map_ring_to_vector(adapter);
1242 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1243 goto err_map_queues;
1249 igb_free_queues(adapter);
1251 igb_free_q_vectors(adapter);
1252 err_alloc_q_vectors:
1253 igb_reset_interrupt_capability(adapter);
1258 * igb_request_irq - initialize interrupts
1260 * Attempts to configure interrupts using the best available
1261 * capabilities of the hardware and kernel.
1263 static int igb_request_irq(struct igb_adapter *adapter)
1265 struct net_device *netdev = adapter->netdev;
1266 struct pci_dev *pdev = adapter->pdev;
1269 if (adapter->msix_entries) {
1270 err = igb_request_msix(adapter);
1273 /* fall back to MSI */
1274 igb_clear_interrupt_scheme(adapter);
1275 if (!pci_enable_msi(adapter->pdev))
1276 adapter->flags |= IGB_FLAG_HAS_MSI;
1277 igb_free_all_tx_resources(adapter);
1278 igb_free_all_rx_resources(adapter);
1279 adapter->num_tx_queues = 1;
1280 adapter->num_rx_queues = 1;
1281 adapter->num_q_vectors = 1;
1282 err = igb_alloc_q_vectors(adapter);
1285 "Unable to allocate memory for vectors\n");
1288 err = igb_alloc_queues(adapter);
1291 "Unable to allocate memory for queues\n");
1292 igb_free_q_vectors(adapter);
1295 igb_setup_all_tx_resources(adapter);
1296 igb_setup_all_rx_resources(adapter);
1298 igb_assign_vector(adapter->q_vector[0], 0);
1301 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1302 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1303 netdev->name, adapter);
1307 /* fall back to legacy interrupts */
1308 igb_reset_interrupt_capability(adapter);
1309 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1312 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1313 netdev->name, adapter);
1316 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1323 static void igb_free_irq(struct igb_adapter *adapter)
1325 if (adapter->msix_entries) {
1328 free_irq(adapter->msix_entries[vector++].vector, adapter);
1330 for (i = 0; i < adapter->num_q_vectors; i++) {
1331 struct igb_q_vector *q_vector = adapter->q_vector[i];
1332 free_irq(adapter->msix_entries[vector++].vector,
1336 free_irq(adapter->pdev->irq, adapter);
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1344 static void igb_irq_disable(struct igb_adapter *adapter)
1346 struct e1000_hw *hw = &adapter->hw;
1349 * we need to be careful when disabling interrupts. The VFs are also
1350 * mapped into these registers and so clearing the bits can cause
1351 * issues on the VF drivers so we only need to clear what we set
1353 if (adapter->msix_entries) {
1354 u32 regval = rd32(E1000_EIAM);
1355 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356 wr32(E1000_EIMC, adapter->eims_enable_mask);
1357 regval = rd32(E1000_EIAC);
1358 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1362 wr32(E1000_IMC, ~0);
1364 if (adapter->msix_entries) {
1366 for (i = 0; i < adapter->num_q_vectors; i++)
1367 synchronize_irq(adapter->msix_entries[i].vector);
1369 synchronize_irq(adapter->pdev->irq);
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1377 static void igb_irq_enable(struct igb_adapter *adapter)
1379 struct e1000_hw *hw = &adapter->hw;
1381 if (adapter->msix_entries) {
1382 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1383 u32 regval = rd32(E1000_EIAC);
1384 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385 regval = rd32(E1000_EIAM);
1386 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387 wr32(E1000_EIMS, adapter->eims_enable_mask);
1388 if (adapter->vfs_allocated_count) {
1389 wr32(E1000_MBVFIMR, 0xFF);
1390 ims |= E1000_IMS_VMMB;
1392 if (adapter->hw.mac.type == e1000_82580)
1393 ims |= E1000_IMS_DRSTA;
1395 wr32(E1000_IMS, ims);
1397 wr32(E1000_IMS, IMS_ENABLE_MASK |
1399 wr32(E1000_IAM, IMS_ENABLE_MASK |
1404 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1406 struct e1000_hw *hw = &adapter->hw;
1407 u16 vid = adapter->hw.mng_cookie.vlan_id;
1408 u16 old_vid = adapter->mng_vlan_id;
1410 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1411 /* add VID to filter table */
1412 igb_vfta_set(hw, vid, true);
1413 adapter->mng_vlan_id = vid;
1415 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1418 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1420 !test_bit(old_vid, adapter->active_vlans)) {
1421 /* remove VID from filter table */
1422 igb_vfta_set(hw, old_vid, false);
1427 * igb_release_hw_control - release control of the h/w to f/w
1428 * @adapter: address of board private structure
1430 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1431 * For ASF and Pass Through versions of f/w this means that the
1432 * driver is no longer loaded.
1435 static void igb_release_hw_control(struct igb_adapter *adapter)
1437 struct e1000_hw *hw = &adapter->hw;
1440 /* Let firmware take over control of h/w */
1441 ctrl_ext = rd32(E1000_CTRL_EXT);
1442 wr32(E1000_CTRL_EXT,
1443 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1447 * igb_get_hw_control - get control of the h/w from f/w
1448 * @adapter: address of board private structure
1450 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1451 * For ASF and Pass Through versions of f/w this means that
1452 * the driver is loaded.
1455 static void igb_get_hw_control(struct igb_adapter *adapter)
1457 struct e1000_hw *hw = &adapter->hw;
1460 /* Let firmware know the driver has taken over */
1461 ctrl_ext = rd32(E1000_CTRL_EXT);
1462 wr32(E1000_CTRL_EXT,
1463 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1467 * igb_configure - configure the hardware for RX and TX
1468 * @adapter: private board structure
1470 static void igb_configure(struct igb_adapter *adapter)
1472 struct net_device *netdev = adapter->netdev;
1475 igb_get_hw_control(adapter);
1476 igb_set_rx_mode(netdev);
1478 igb_restore_vlan(adapter);
1480 igb_setup_tctl(adapter);
1481 igb_setup_mrqc(adapter);
1482 igb_setup_rctl(adapter);
1484 igb_configure_tx(adapter);
1485 igb_configure_rx(adapter);
1487 igb_rx_fifo_flush_82575(&adapter->hw);
1489 /* call igb_desc_unused which always leaves
1490 * at least 1 descriptor unused to make sure
1491 * next_to_use != next_to_clean */
1492 for (i = 0; i < adapter->num_rx_queues; i++) {
1493 struct igb_ring *ring = adapter->rx_ring[i];
1494 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1499 * igb_power_up_link - Power up the phy/serdes link
1500 * @adapter: address of board private structure
1502 void igb_power_up_link(struct igb_adapter *adapter)
1504 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1505 igb_power_up_phy_copper(&adapter->hw);
1507 igb_power_up_serdes_link_82575(&adapter->hw);
1511 * igb_power_down_link - Power down the phy/serdes link
1512 * @adapter: address of board private structure
1514 static void igb_power_down_link(struct igb_adapter *adapter)
1516 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1517 igb_power_down_phy_copper_82575(&adapter->hw);
1519 igb_shutdown_serdes_link_82575(&adapter->hw);
1523 * igb_up - Open the interface and prepare it to handle traffic
1524 * @adapter: board private structure
1526 int igb_up(struct igb_adapter *adapter)
1528 struct e1000_hw *hw = &adapter->hw;
1531 /* hardware has been reset, we need to reload some things */
1532 igb_configure(adapter);
1534 clear_bit(__IGB_DOWN, &adapter->state);
1536 for (i = 0; i < adapter->num_q_vectors; i++) {
1537 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538 napi_enable(&q_vector->napi);
1540 if (adapter->msix_entries)
1541 igb_configure_msix(adapter);
1543 igb_assign_vector(adapter->q_vector[0], 0);
1545 /* Clear any pending interrupts. */
1547 igb_irq_enable(adapter);
1549 /* notify VFs that reset has been completed */
1550 if (adapter->vfs_allocated_count) {
1551 u32 reg_data = rd32(E1000_CTRL_EXT);
1552 reg_data |= E1000_CTRL_EXT_PFRSTD;
1553 wr32(E1000_CTRL_EXT, reg_data);
1556 netif_tx_start_all_queues(adapter->netdev);
1558 /* start the watchdog. */
1559 hw->mac.get_link_status = 1;
1560 schedule_work(&adapter->watchdog_task);
1565 void igb_down(struct igb_adapter *adapter)
1567 struct net_device *netdev = adapter->netdev;
1568 struct e1000_hw *hw = &adapter->hw;
1572 /* signal that we're down so the interrupt handler does not
1573 * reschedule our watchdog timer */
1574 set_bit(__IGB_DOWN, &adapter->state);
1576 /* disable receives in the hardware */
1577 rctl = rd32(E1000_RCTL);
1578 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1579 /* flush and sleep below */
1581 netif_tx_stop_all_queues(netdev);
1583 /* disable transmits in the hardware */
1584 tctl = rd32(E1000_TCTL);
1585 tctl &= ~E1000_TCTL_EN;
1586 wr32(E1000_TCTL, tctl);
1587 /* flush both disables and wait for them to finish */
1591 for (i = 0; i < adapter->num_q_vectors; i++) {
1592 struct igb_q_vector *q_vector = adapter->q_vector[i];
1593 napi_disable(&q_vector->napi);
1596 igb_irq_disable(adapter);
1598 del_timer_sync(&adapter->watchdog_timer);
1599 del_timer_sync(&adapter->phy_info_timer);
1601 netif_carrier_off(netdev);
1603 /* record the stats before reset*/
1604 spin_lock(&adapter->stats64_lock);
1605 igb_update_stats(adapter, &adapter->stats64);
1606 spin_unlock(&adapter->stats64_lock);
1608 adapter->link_speed = 0;
1609 adapter->link_duplex = 0;
1611 if (!pci_channel_offline(adapter->pdev))
1613 igb_clean_all_tx_rings(adapter);
1614 igb_clean_all_rx_rings(adapter);
1615 #ifdef CONFIG_IGB_DCA
1617 /* since we reset the hardware DCA settings were cleared */
1618 igb_setup_dca(adapter);
1622 void igb_reinit_locked(struct igb_adapter *adapter)
1624 WARN_ON(in_interrupt());
1625 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1629 clear_bit(__IGB_RESETTING, &adapter->state);
1632 void igb_reset(struct igb_adapter *adapter)
1634 struct pci_dev *pdev = adapter->pdev;
1635 struct e1000_hw *hw = &adapter->hw;
1636 struct e1000_mac_info *mac = &hw->mac;
1637 struct e1000_fc_info *fc = &hw->fc;
1638 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641 /* Repartition Pba for greater than 9k mtu
1642 * To take effect CTRL.RST is required.
1644 switch (mac->type) {
1647 pba = rd32(E1000_RXPBS);
1648 pba = igb_rxpbs_adjust_82580(pba);
1651 pba = rd32(E1000_RXPBS);
1652 pba &= E1000_RXPBS_SIZE_MASK_82576;
1656 pba = E1000_PBA_34K;
1660 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661 (mac->type < e1000_82576)) {
1662 /* adjust PBA for jumbo frames */
1663 wr32(E1000_PBA, pba);
1665 /* To maintain wire speed transmits, the Tx FIFO should be
1666 * large enough to accommodate two full transmit packets,
1667 * rounded up to the next 1KB and expressed in KB. Likewise,
1668 * the Rx FIFO should be large enough to accommodate at least
1669 * one full receive packet and is similarly rounded up and
1670 * expressed in KB. */
1671 pba = rd32(E1000_PBA);
1672 /* upper 16 bits has Tx packet buffer allocation size in KB */
1673 tx_space = pba >> 16;
1674 /* lower 16 bits has Rx packet buffer allocation size in KB */
1676 /* the tx fifo also stores 16 bytes of information about the tx
1677 * but don't include ethernet FCS because hardware appends it */
1678 min_tx_space = (adapter->max_frame_size +
1679 sizeof(union e1000_adv_tx_desc) -
1681 min_tx_space = ALIGN(min_tx_space, 1024);
1682 min_tx_space >>= 10;
1683 /* software strips receive CRC, so leave room for it */
1684 min_rx_space = adapter->max_frame_size;
1685 min_rx_space = ALIGN(min_rx_space, 1024);
1686 min_rx_space >>= 10;
1688 /* If current Tx allocation is less than the min Tx FIFO size,
1689 * and the min Tx FIFO size is less than the current Rx FIFO
1690 * allocation, take space away from current Rx allocation */
1691 if (tx_space < min_tx_space &&
1692 ((min_tx_space - tx_space) < pba)) {
1693 pba = pba - (min_tx_space - tx_space);
1695 /* if short on rx space, rx wins and must trump tx
1697 if (pba < min_rx_space)
1700 wr32(E1000_PBA, pba);
1703 /* flow control settings */
1704 /* The high water mark must be low enough to fit one full frame
1705 * (or the size used for early receive) above it in the Rx FIFO.
1706 * Set it to the lower of:
1707 * - 90% of the Rx FIFO size, or
1708 * - the full Rx FIFO size minus one full frame */
1709 hwm = min(((pba << 10) * 9 / 10),
1710 ((pba << 10) - 2 * adapter->max_frame_size));
1712 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1713 fc->low_water = fc->high_water - 16;
1714 fc->pause_time = 0xFFFF;
1716 fc->current_mode = fc->requested_mode;
1718 /* disable receive for all VFs and wait one second */
1719 if (adapter->vfs_allocated_count) {
1721 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1724 /* ping all the active vfs to let them know we are going down */
1725 igb_ping_all_vfs(adapter);
1727 /* disable transmits and receives */
1728 wr32(E1000_VFRE, 0);
1729 wr32(E1000_VFTE, 0);
1732 /* Allow time for pending master requests to run */
1733 hw->mac.ops.reset_hw(hw);
1736 if (hw->mac.ops.init_hw(hw))
1737 dev_err(&pdev->dev, "Hardware Error\n");
1738 if (hw->mac.type > e1000_82580) {
1739 if (adapter->flags & IGB_FLAG_DMAC) {
1743 * DMA Coalescing high water mark needs to be higher
1744 * than * the * Rx threshold. The Rx threshold is
1745 * currently * pba - 6, so we * should use a high water
1746 * mark of pba * - 4. */
1747 hwm = (pba - 4) << 10;
1749 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1750 & E1000_DMACR_DMACTHR_MASK);
1752 /* transition to L0x or L1 if available..*/
1753 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1755 /* watchdog timer= +-1000 usec in 32usec intervals */
1757 wr32(E1000_DMACR, reg);
1759 /* no lower threshold to disable coalescing(smart fifb)
1761 wr32(E1000_DMCRTRH, 0);
1763 /* set hwm to PBA - 2 * max frame size */
1764 wr32(E1000_FCRTC, hwm);
1767 * This sets the time to wait before requesting tran-
1768 * sition to * low power state to number of usecs needed
1769 * to receive 1 512 * byte frame at gigabit line rate
1771 reg = rd32(E1000_DMCTLX);
1772 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1774 /* Delay 255 usec before entering Lx state. */
1776 wr32(E1000_DMCTLX, reg);
1778 /* free space in Tx packet buffer to wake from DMAC */
1781 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1784 /* make low power state decision controlled by DMAC */
1785 reg = rd32(E1000_PCIEMISC);
1786 reg |= E1000_PCIEMISC_LX_DECISION;
1787 wr32(E1000_PCIEMISC, reg);
1788 } /* end if IGB_FLAG_DMAC set */
1790 if (hw->mac.type == e1000_82580) {
1791 u32 reg = rd32(E1000_PCIEMISC);
1792 wr32(E1000_PCIEMISC,
1793 reg & ~E1000_PCIEMISC_LX_DECISION);
1795 if (!netif_running(adapter->netdev))
1796 igb_power_down_link(adapter);
1798 igb_update_mng_vlan(adapter);
1800 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1801 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1803 igb_get_phy_info(hw);
1806 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1809 * Since there is no support for separate rx/tx vlan accel
1810 * enable/disable make sure tx flag is always in same state as rx.
1812 if (features & NETIF_F_HW_VLAN_RX)
1813 features |= NETIF_F_HW_VLAN_TX;
1815 features &= ~NETIF_F_HW_VLAN_TX;
1820 static int igb_set_features(struct net_device *netdev, u32 features)
1822 struct igb_adapter *adapter = netdev_priv(netdev);
1824 u32 changed = netdev->features ^ features;
1826 for (i = 0; i < adapter->num_rx_queues; i++) {
1827 if (features & NETIF_F_RXCSUM)
1828 set_bit(IGB_RING_FLAG_RX_CSUM,
1829 &adapter->rx_ring[i]->flags);
1831 clear_bit(IGB_RING_FLAG_RX_CSUM,
1832 &adapter->rx_ring[i]->flags);
1835 if (changed & NETIF_F_HW_VLAN_RX)
1836 igb_vlan_mode(netdev, features);
1841 static const struct net_device_ops igb_netdev_ops = {
1842 .ndo_open = igb_open,
1843 .ndo_stop = igb_close,
1844 .ndo_start_xmit = igb_xmit_frame,
1845 .ndo_get_stats64 = igb_get_stats64,
1846 .ndo_set_rx_mode = igb_set_rx_mode,
1847 .ndo_set_mac_address = igb_set_mac,
1848 .ndo_change_mtu = igb_change_mtu,
1849 .ndo_do_ioctl = igb_ioctl,
1850 .ndo_tx_timeout = igb_tx_timeout,
1851 .ndo_validate_addr = eth_validate_addr,
1852 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1853 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1854 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1855 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1856 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1857 .ndo_get_vf_config = igb_ndo_get_vf_config,
1858 #ifdef CONFIG_NET_POLL_CONTROLLER
1859 .ndo_poll_controller = igb_netpoll,
1861 .ndo_fix_features = igb_fix_features,
1862 .ndo_set_features = igb_set_features,
1866 * igb_probe - Device Initialization Routine
1867 * @pdev: PCI device information struct
1868 * @ent: entry in igb_pci_tbl
1870 * Returns 0 on success, negative on failure
1872 * igb_probe initializes an adapter identified by a pci_dev structure.
1873 * The OS initialization, configuring of the adapter private structure,
1874 * and a hardware reset occur.
1876 static int __devinit igb_probe(struct pci_dev *pdev,
1877 const struct pci_device_id *ent)
1879 struct net_device *netdev;
1880 struct igb_adapter *adapter;
1881 struct e1000_hw *hw;
1882 u16 eeprom_data = 0;
1884 static int global_quad_port_a; /* global quad port a indication */
1885 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1886 unsigned long mmio_start, mmio_len;
1887 int err, pci_using_dac;
1888 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1889 u8 part_str[E1000_PBANUM_LENGTH];
1891 /* Catch broken hardware that put the wrong VF device ID in
1892 * the PCIe SR-IOV capability.
1894 if (pdev->is_virtfn) {
1895 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1896 pci_name(pdev), pdev->vendor, pdev->device);
1900 err = pci_enable_device_mem(pdev);
1905 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1907 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1911 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1913 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1915 dev_err(&pdev->dev, "No usable DMA "
1916 "configuration, aborting\n");
1922 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1928 pci_enable_pcie_error_reporting(pdev);
1930 pci_set_master(pdev);
1931 pci_save_state(pdev);
1934 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1937 goto err_alloc_etherdev;
1939 SET_NETDEV_DEV(netdev, &pdev->dev);
1941 pci_set_drvdata(pdev, netdev);
1942 adapter = netdev_priv(netdev);
1943 adapter->netdev = netdev;
1944 adapter->pdev = pdev;
1947 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1949 mmio_start = pci_resource_start(pdev, 0);
1950 mmio_len = pci_resource_len(pdev, 0);
1953 hw->hw_addr = ioremap(mmio_start, mmio_len);
1957 netdev->netdev_ops = &igb_netdev_ops;
1958 igb_set_ethtool_ops(netdev);
1959 netdev->watchdog_timeo = 5 * HZ;
1961 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1963 netdev->mem_start = mmio_start;
1964 netdev->mem_end = mmio_start + mmio_len;
1966 /* PCI config space info */
1967 hw->vendor_id = pdev->vendor;
1968 hw->device_id = pdev->device;
1969 hw->revision_id = pdev->revision;
1970 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1971 hw->subsystem_device_id = pdev->subsystem_device;
1973 /* Copy the default MAC, PHY and NVM function pointers */
1974 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1975 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1976 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1977 /* Initialize skew-specific constants */
1978 err = ei->get_invariants(hw);
1982 /* setup the private structure */
1983 err = igb_sw_init(adapter);
1987 igb_get_bus_info_pcie(hw);
1989 hw->phy.autoneg_wait_to_complete = false;
1991 /* Copper options */
1992 if (hw->phy.media_type == e1000_media_type_copper) {
1993 hw->phy.mdix = AUTO_ALL_MODES;
1994 hw->phy.disable_polarity_correction = false;
1995 hw->phy.ms_type = e1000_ms_hw_default;
1998 if (igb_check_reset_block(hw))
1999 dev_info(&pdev->dev,
2000 "PHY reset is blocked due to SOL/IDER session.\n");
2002 netdev->hw_features = NETIF_F_SG |
2010 netdev->features = netdev->hw_features |
2011 NETIF_F_HW_VLAN_TX |
2012 NETIF_F_HW_VLAN_FILTER;
2014 netdev->vlan_features |= NETIF_F_TSO;
2015 netdev->vlan_features |= NETIF_F_TSO6;
2016 netdev->vlan_features |= NETIF_F_IP_CSUM;
2017 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
2018 netdev->vlan_features |= NETIF_F_SG;
2020 if (pci_using_dac) {
2021 netdev->features |= NETIF_F_HIGHDMA;
2022 netdev->vlan_features |= NETIF_F_HIGHDMA;
2025 if (hw->mac.type >= e1000_82576) {
2026 netdev->hw_features |= NETIF_F_SCTP_CSUM;
2027 netdev->features |= NETIF_F_SCTP_CSUM;
2030 netdev->priv_flags |= IFF_UNICAST_FLT;
2032 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2034 /* before reading the NVM, reset the controller to put the device in a
2035 * known good starting state */
2036 hw->mac.ops.reset_hw(hw);
2038 /* make sure the NVM is good */
2039 if (hw->nvm.ops.validate(hw) < 0) {
2040 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2045 /* copy the MAC address out of the NVM */
2046 if (hw->mac.ops.read_mac_addr(hw))
2047 dev_err(&pdev->dev, "NVM Read Error\n");
2049 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2050 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2052 if (!is_valid_ether_addr(netdev->perm_addr)) {
2053 dev_err(&pdev->dev, "Invalid MAC Address\n");
2058 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2059 (unsigned long) adapter);
2060 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2061 (unsigned long) adapter);
2063 INIT_WORK(&adapter->reset_task, igb_reset_task);
2064 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2066 /* Initialize link properties that are user-changeable */
2067 adapter->fc_autoneg = true;
2068 hw->mac.autoneg = true;
2069 hw->phy.autoneg_advertised = 0x2f;
2071 hw->fc.requested_mode = e1000_fc_default;
2072 hw->fc.current_mode = e1000_fc_default;
2074 igb_validate_mdi_setting(hw);
2076 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2077 * enable the ACPI Magic Packet filter
2080 if (hw->bus.func == 0)
2081 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2082 else if (hw->mac.type >= e1000_82580)
2083 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2084 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2086 else if (hw->bus.func == 1)
2087 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2089 if (eeprom_data & eeprom_apme_mask)
2090 adapter->eeprom_wol |= E1000_WUFC_MAG;
2092 /* now that we have the eeprom settings, apply the special cases where
2093 * the eeprom may be wrong or the board simply won't support wake on
2094 * lan on a particular port */
2095 switch (pdev->device) {
2096 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2097 adapter->eeprom_wol = 0;
2099 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2100 case E1000_DEV_ID_82576_FIBER:
2101 case E1000_DEV_ID_82576_SERDES:
2102 /* Wake events only supported on port A for dual fiber
2103 * regardless of eeprom setting */
2104 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2105 adapter->eeprom_wol = 0;
2107 case E1000_DEV_ID_82576_QUAD_COPPER:
2108 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2109 /* if quad port adapter, disable WoL on all but port A */
2110 if (global_quad_port_a != 0)
2111 adapter->eeprom_wol = 0;
2113 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2114 /* Reset for multiple quad port adapters */
2115 if (++global_quad_port_a == 4)
2116 global_quad_port_a = 0;
2120 /* initialize the wol settings based on the eeprom settings */
2121 adapter->wol = adapter->eeprom_wol;
2122 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2124 /* reset the hardware with the new settings */
2127 /* let the f/w know that the h/w is now under the control of the
2129 igb_get_hw_control(adapter);
2131 strcpy(netdev->name, "eth%d");
2132 err = register_netdev(netdev);
2136 igb_vlan_mode(netdev, netdev->features);
2138 /* carrier off reporting is important to ethtool even BEFORE open */
2139 netif_carrier_off(netdev);
2141 #ifdef CONFIG_IGB_DCA
2142 if (dca_add_requester(&pdev->dev) == 0) {
2143 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2144 dev_info(&pdev->dev, "DCA enabled\n");
2145 igb_setup_dca(adapter);
2149 /* do hw tstamp init after resetting */
2150 igb_init_hw_timer(adapter);
2152 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2153 /* print bus type/speed/width info */
2154 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2156 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2157 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2159 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2160 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2161 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2165 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2167 strcpy(part_str, "Unknown");
2168 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2169 dev_info(&pdev->dev,
2170 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2171 adapter->msix_entries ? "MSI-X" :
2172 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2173 adapter->num_rx_queues, adapter->num_tx_queues);
2174 switch (hw->mac.type) {
2176 igb_set_eee_i350(hw);
2184 igb_release_hw_control(adapter);
2186 if (!igb_check_reset_block(hw))
2189 if (hw->flash_address)
2190 iounmap(hw->flash_address);
2192 igb_clear_interrupt_scheme(adapter);
2193 iounmap(hw->hw_addr);
2195 free_netdev(netdev);
2197 pci_release_selected_regions(pdev,
2198 pci_select_bars(pdev, IORESOURCE_MEM));
2201 pci_disable_device(pdev);
2206 * igb_remove - Device Removal Routine
2207 * @pdev: PCI device information struct
2209 * igb_remove is called by the PCI subsystem to alert the driver
2210 * that it should release a PCI device. The could be caused by a
2211 * Hot-Plug event, or because the driver is going to be removed from
2214 static void __devexit igb_remove(struct pci_dev *pdev)
2216 struct net_device *netdev = pci_get_drvdata(pdev);
2217 struct igb_adapter *adapter = netdev_priv(netdev);
2218 struct e1000_hw *hw = &adapter->hw;
2221 * The watchdog timer may be rescheduled, so explicitly
2222 * disable watchdog from being rescheduled.
2224 set_bit(__IGB_DOWN, &adapter->state);
2225 del_timer_sync(&adapter->watchdog_timer);
2226 del_timer_sync(&adapter->phy_info_timer);
2228 cancel_work_sync(&adapter->reset_task);
2229 cancel_work_sync(&adapter->watchdog_task);
2231 #ifdef CONFIG_IGB_DCA
2232 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2233 dev_info(&pdev->dev, "DCA disabled\n");
2234 dca_remove_requester(&pdev->dev);
2235 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2236 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2240 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2241 * would have already happened in close and is redundant. */
2242 igb_release_hw_control(adapter);
2244 unregister_netdev(netdev);
2246 igb_clear_interrupt_scheme(adapter);
2248 #ifdef CONFIG_PCI_IOV
2249 /* reclaim resources allocated to VFs */
2250 if (adapter->vf_data) {
2251 /* disable iov and allow time for transactions to clear */
2252 pci_disable_sriov(pdev);
2255 kfree(adapter->vf_data);
2256 adapter->vf_data = NULL;
2257 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2260 dev_info(&pdev->dev, "IOV Disabled\n");
2264 iounmap(hw->hw_addr);
2265 if (hw->flash_address)
2266 iounmap(hw->flash_address);
2267 pci_release_selected_regions(pdev,
2268 pci_select_bars(pdev, IORESOURCE_MEM));
2270 free_netdev(netdev);
2272 pci_disable_pcie_error_reporting(pdev);
2274 pci_disable_device(pdev);
2278 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2279 * @adapter: board private structure to initialize
2281 * This function initializes the vf specific data storage and then attempts to
2282 * allocate the VFs. The reason for ordering it this way is because it is much
2283 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2284 * the memory for the VFs.
2286 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2288 #ifdef CONFIG_PCI_IOV
2289 struct pci_dev *pdev = adapter->pdev;
2291 if (adapter->vfs_allocated_count) {
2292 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2293 sizeof(struct vf_data_storage),
2295 /* if allocation failed then we do not support SR-IOV */
2296 if (!adapter->vf_data) {
2297 adapter->vfs_allocated_count = 0;
2298 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2303 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2304 kfree(adapter->vf_data);
2305 adapter->vf_data = NULL;
2306 #endif /* CONFIG_PCI_IOV */
2307 adapter->vfs_allocated_count = 0;
2308 #ifdef CONFIG_PCI_IOV
2310 unsigned char mac_addr[ETH_ALEN];
2312 dev_info(&pdev->dev, "%d vfs allocated\n",
2313 adapter->vfs_allocated_count);
2314 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2315 random_ether_addr(mac_addr);
2316 igb_set_vf_mac(adapter, i, mac_addr);
2318 /* DMA Coalescing is not supported in IOV mode. */
2319 if (adapter->flags & IGB_FLAG_DMAC)
2320 adapter->flags &= ~IGB_FLAG_DMAC;
2322 #endif /* CONFIG_PCI_IOV */
2327 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2328 * @adapter: board private structure to initialize
2330 * igb_init_hw_timer initializes the function pointer and values for the hw
2331 * timer found in hardware.
2333 static void igb_init_hw_timer(struct igb_adapter *adapter)
2335 struct e1000_hw *hw = &adapter->hw;
2337 switch (hw->mac.type) {
2340 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2341 adapter->cycles.read = igb_read_clock;
2342 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2343 adapter->cycles.mult = 1;
2345 * The 82580 timesync updates the system timer every 8ns by 8ns
2346 * and the value cannot be shifted. Instead we need to shift
2347 * the registers to generate a 64bit timer value. As a result
2348 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2349 * 24 in order to generate a larger value for synchronization.
2351 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2352 /* disable system timer temporarily by setting bit 31 */
2353 wr32(E1000_TSAUXC, 0x80000000);
2356 /* Set registers so that rollover occurs soon to test this. */
2357 wr32(E1000_SYSTIMR, 0x00000000);
2358 wr32(E1000_SYSTIML, 0x80000000);
2359 wr32(E1000_SYSTIMH, 0x000000FF);
2362 /* enable system timer by clearing bit 31 */
2363 wr32(E1000_TSAUXC, 0x0);
2366 timecounter_init(&adapter->clock,
2368 ktime_to_ns(ktime_get_real()));
2370 * Synchronize our NIC clock against system wall clock. NIC
2371 * time stamp reading requires ~3us per sample, each sample
2372 * was pretty stable even under load => only require 10
2373 * samples for each offset comparison.
2375 memset(&adapter->compare, 0, sizeof(adapter->compare));
2376 adapter->compare.source = &adapter->clock;
2377 adapter->compare.target = ktime_get_real;
2378 adapter->compare.num_samples = 10;
2379 timecompare_update(&adapter->compare, 0);
2383 * Initialize hardware timer: we keep it running just in case
2384 * that some program needs it later on.
2386 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2387 adapter->cycles.read = igb_read_clock;
2388 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2389 adapter->cycles.mult = 1;
2391 * Scale the NIC clock cycle by a large factor so that
2392 * relatively small clock corrections can be added or
2393 * subtracted at each clock tick. The drawbacks of a large
2394 * factor are a) that the clock register overflows more quickly
2395 * (not such a big deal) and b) that the increment per tick has
2396 * to fit into 24 bits. As a result we need to use a shift of
2397 * 19 so we can fit a value of 16 into the TIMINCA register.
2399 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2401 (1 << E1000_TIMINCA_16NS_SHIFT) |
2402 (16 << IGB_82576_TSYNC_SHIFT));
2404 /* Set registers so that rollover occurs soon to test this. */
2405 wr32(E1000_SYSTIML, 0x00000000);
2406 wr32(E1000_SYSTIMH, 0xFF800000);
2409 timecounter_init(&adapter->clock,
2411 ktime_to_ns(ktime_get_real()));
2413 * Synchronize our NIC clock against system wall clock. NIC
2414 * time stamp reading requires ~3us per sample, each sample
2415 * was pretty stable even under load => only require 10
2416 * samples for each offset comparison.
2418 memset(&adapter->compare, 0, sizeof(adapter->compare));
2419 adapter->compare.source = &adapter->clock;
2420 adapter->compare.target = ktime_get_real;
2421 adapter->compare.num_samples = 10;
2422 timecompare_update(&adapter->compare, 0);
2425 /* 82575 does not support timesync */
2433 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2434 * @adapter: board private structure to initialize
2436 * igb_sw_init initializes the Adapter private data structure.
2437 * Fields are initialized based on PCI device information and
2438 * OS network device settings (MTU size).
2440 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2442 struct e1000_hw *hw = &adapter->hw;
2443 struct net_device *netdev = adapter->netdev;
2444 struct pci_dev *pdev = adapter->pdev;
2446 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2448 /* set default ring sizes */
2449 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2450 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2452 /* set default ITR values */
2453 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2454 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2456 /* set default work limits */
2457 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2459 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2461 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2465 spin_lock_init(&adapter->stats64_lock);
2466 #ifdef CONFIG_PCI_IOV
2467 switch (hw->mac.type) {
2471 dev_warn(&pdev->dev,
2472 "Maximum of 7 VFs per PF, using max\n");
2473 adapter->vfs_allocated_count = 7;
2475 adapter->vfs_allocated_count = max_vfs;
2480 #endif /* CONFIG_PCI_IOV */
2481 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2482 /* i350 cannot do RSS and SR-IOV at the same time */
2483 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2484 adapter->rss_queues = 1;
2487 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2488 * then we should combine the queues into a queue pair in order to
2489 * conserve interrupts due to limited supply
2491 if ((adapter->rss_queues > 4) ||
2492 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2493 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2495 /* This call may decrease the number of queues */
2496 if (igb_init_interrupt_scheme(adapter)) {
2497 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2501 igb_probe_vfs(adapter);
2503 /* Explicitly disable IRQ since the NIC can be in any state. */
2504 igb_irq_disable(adapter);
2506 if (hw->mac.type == e1000_i350)
2507 adapter->flags &= ~IGB_FLAG_DMAC;
2509 set_bit(__IGB_DOWN, &adapter->state);
2514 * igb_open - Called when a network interface is made active
2515 * @netdev: network interface device structure
2517 * Returns 0 on success, negative value on failure
2519 * The open entry point is called when a network interface is made
2520 * active by the system (IFF_UP). At this point all resources needed
2521 * for transmit and receive operations are allocated, the interrupt
2522 * handler is registered with the OS, the watchdog timer is started,
2523 * and the stack is notified that the interface is ready.
2525 static int igb_open(struct net_device *netdev)
2527 struct igb_adapter *adapter = netdev_priv(netdev);
2528 struct e1000_hw *hw = &adapter->hw;
2532 /* disallow open during test */
2533 if (test_bit(__IGB_TESTING, &adapter->state))
2536 netif_carrier_off(netdev);
2538 /* allocate transmit descriptors */
2539 err = igb_setup_all_tx_resources(adapter);
2543 /* allocate receive descriptors */
2544 err = igb_setup_all_rx_resources(adapter);
2548 igb_power_up_link(adapter);
2550 /* before we allocate an interrupt, we must be ready to handle it.
2551 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2552 * as soon as we call pci_request_irq, so we have to setup our
2553 * clean_rx handler before we do so. */
2554 igb_configure(adapter);
2556 err = igb_request_irq(adapter);
2560 /* From here on the code is the same as igb_up() */
2561 clear_bit(__IGB_DOWN, &adapter->state);
2563 for (i = 0; i < adapter->num_q_vectors; i++) {
2564 struct igb_q_vector *q_vector = adapter->q_vector[i];
2565 napi_enable(&q_vector->napi);
2568 /* Clear any pending interrupts. */
2571 igb_irq_enable(adapter);
2573 /* notify VFs that reset has been completed */
2574 if (adapter->vfs_allocated_count) {
2575 u32 reg_data = rd32(E1000_CTRL_EXT);
2576 reg_data |= E1000_CTRL_EXT_PFRSTD;
2577 wr32(E1000_CTRL_EXT, reg_data);
2580 netif_tx_start_all_queues(netdev);
2582 /* start the watchdog. */
2583 hw->mac.get_link_status = 1;
2584 schedule_work(&adapter->watchdog_task);
2589 igb_release_hw_control(adapter);
2590 igb_power_down_link(adapter);
2591 igb_free_all_rx_resources(adapter);
2593 igb_free_all_tx_resources(adapter);
2601 * igb_close - Disables a network interface
2602 * @netdev: network interface device structure
2604 * Returns 0, this is not allowed to fail
2606 * The close entry point is called when an interface is de-activated
2607 * by the OS. The hardware is still under the driver's control, but
2608 * needs to be disabled. A global MAC reset is issued to stop the
2609 * hardware, and all transmit and receive resources are freed.
2611 static int igb_close(struct net_device *netdev)
2613 struct igb_adapter *adapter = netdev_priv(netdev);
2615 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2618 igb_free_irq(adapter);
2620 igb_free_all_tx_resources(adapter);
2621 igb_free_all_rx_resources(adapter);
2627 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2628 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2630 * Return 0 on success, negative on failure
2632 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2634 struct device *dev = tx_ring->dev;
2635 int orig_node = dev_to_node(dev);
2638 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2639 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2640 if (!tx_ring->tx_buffer_info)
2641 tx_ring->tx_buffer_info = vzalloc(size);
2642 if (!tx_ring->tx_buffer_info)
2645 /* round up to nearest 4K */
2646 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2647 tx_ring->size = ALIGN(tx_ring->size, 4096);
2649 set_dev_node(dev, tx_ring->numa_node);
2650 tx_ring->desc = dma_alloc_coherent(dev,
2654 set_dev_node(dev, orig_node);
2656 tx_ring->desc = dma_alloc_coherent(dev,
2664 tx_ring->next_to_use = 0;
2665 tx_ring->next_to_clean = 0;
2670 vfree(tx_ring->tx_buffer_info);
2672 "Unable to allocate memory for the transmit descriptor ring\n");
2677 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2678 * (Descriptors) for all queues
2679 * @adapter: board private structure
2681 * Return 0 on success, negative on failure
2683 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2685 struct pci_dev *pdev = adapter->pdev;
2688 for (i = 0; i < adapter->num_tx_queues; i++) {
2689 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2692 "Allocation for Tx Queue %u failed\n", i);
2693 for (i--; i >= 0; i--)
2694 igb_free_tx_resources(adapter->tx_ring[i]);
2703 * igb_setup_tctl - configure the transmit control registers
2704 * @adapter: Board private structure
2706 void igb_setup_tctl(struct igb_adapter *adapter)
2708 struct e1000_hw *hw = &adapter->hw;
2711 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2712 wr32(E1000_TXDCTL(0), 0);
2714 /* Program the Transmit Control Register */
2715 tctl = rd32(E1000_TCTL);
2716 tctl &= ~E1000_TCTL_CT;
2717 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2718 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2720 igb_config_collision_dist(hw);
2722 /* Enable transmits */
2723 tctl |= E1000_TCTL_EN;
2725 wr32(E1000_TCTL, tctl);
2729 * igb_configure_tx_ring - Configure transmit ring after Reset
2730 * @adapter: board private structure
2731 * @ring: tx ring to configure
2733 * Configure a transmit ring after a reset.
2735 void igb_configure_tx_ring(struct igb_adapter *adapter,
2736 struct igb_ring *ring)
2738 struct e1000_hw *hw = &adapter->hw;
2740 u64 tdba = ring->dma;
2741 int reg_idx = ring->reg_idx;
2743 /* disable the queue */
2744 wr32(E1000_TXDCTL(reg_idx), 0);
2748 wr32(E1000_TDLEN(reg_idx),
2749 ring->count * sizeof(union e1000_adv_tx_desc));
2750 wr32(E1000_TDBAL(reg_idx),
2751 tdba & 0x00000000ffffffffULL);
2752 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2754 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2755 wr32(E1000_TDH(reg_idx), 0);
2756 writel(0, ring->tail);
2758 txdctl |= IGB_TX_PTHRESH;
2759 txdctl |= IGB_TX_HTHRESH << 8;
2760 txdctl |= IGB_TX_WTHRESH << 16;
2762 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2763 wr32(E1000_TXDCTL(reg_idx), txdctl);
2767 * igb_configure_tx - Configure transmit Unit after Reset
2768 * @adapter: board private structure
2770 * Configure the Tx unit of the MAC after a reset.
2772 static void igb_configure_tx(struct igb_adapter *adapter)
2776 for (i = 0; i < adapter->num_tx_queues; i++)
2777 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2781 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2782 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2784 * Returns 0 on success, negative on failure
2786 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2788 struct device *dev = rx_ring->dev;
2789 int orig_node = dev_to_node(dev);
2792 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2793 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2794 if (!rx_ring->rx_buffer_info)
2795 rx_ring->rx_buffer_info = vzalloc(size);
2796 if (!rx_ring->rx_buffer_info)
2799 desc_len = sizeof(union e1000_adv_rx_desc);
2801 /* Round up to nearest 4K */
2802 rx_ring->size = rx_ring->count * desc_len;
2803 rx_ring->size = ALIGN(rx_ring->size, 4096);
2805 set_dev_node(dev, rx_ring->numa_node);
2806 rx_ring->desc = dma_alloc_coherent(dev,
2810 set_dev_node(dev, orig_node);
2812 rx_ring->desc = dma_alloc_coherent(dev,
2820 rx_ring->next_to_clean = 0;
2821 rx_ring->next_to_use = 0;
2826 vfree(rx_ring->rx_buffer_info);
2827 rx_ring->rx_buffer_info = NULL;
2828 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2834 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2835 * (Descriptors) for all queues
2836 * @adapter: board private structure
2838 * Return 0 on success, negative on failure
2840 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2842 struct pci_dev *pdev = adapter->pdev;
2845 for (i = 0; i < adapter->num_rx_queues; i++) {
2846 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2849 "Allocation for Rx Queue %u failed\n", i);
2850 for (i--; i >= 0; i--)
2851 igb_free_rx_resources(adapter->rx_ring[i]);
2860 * igb_setup_mrqc - configure the multiple receive queue control registers
2861 * @adapter: Board private structure
2863 static void igb_setup_mrqc(struct igb_adapter *adapter)
2865 struct e1000_hw *hw = &adapter->hw;
2867 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2872 static const u8 rsshash[40] = {
2873 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2874 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2875 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2876 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2878 /* Fill out hash function seeds */
2879 for (j = 0; j < 10; j++) {
2880 u32 rsskey = rsshash[(j * 4)];
2881 rsskey |= rsshash[(j * 4) + 1] << 8;
2882 rsskey |= rsshash[(j * 4) + 2] << 16;
2883 rsskey |= rsshash[(j * 4) + 3] << 24;
2884 array_wr32(E1000_RSSRK(0), j, rsskey);
2887 num_rx_queues = adapter->rss_queues;
2889 if (adapter->vfs_allocated_count) {
2890 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2891 switch (hw->mac.type) {
2908 if (hw->mac.type == e1000_82575)
2912 for (j = 0; j < (32 * 4); j++) {
2913 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2915 reta.bytes[j & 3] |= num_rx_queues << shift2;
2917 wr32(E1000_RETA(j >> 2), reta.dword);
2921 * Disable raw packet checksumming so that RSS hash is placed in
2922 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2923 * offloads as they are enabled by default
2925 rxcsum = rd32(E1000_RXCSUM);
2926 rxcsum |= E1000_RXCSUM_PCSD;
2928 if (adapter->hw.mac.type >= e1000_82576)
2929 /* Enable Receive Checksum Offload for SCTP */
2930 rxcsum |= E1000_RXCSUM_CRCOFL;
2932 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2933 wr32(E1000_RXCSUM, rxcsum);
2935 /* If VMDq is enabled then we set the appropriate mode for that, else
2936 * we default to RSS so that an RSS hash is calculated per packet even
2937 * if we are only using one queue */
2938 if (adapter->vfs_allocated_count) {
2939 if (hw->mac.type > e1000_82575) {
2940 /* Set the default pool for the PF's first queue */
2941 u32 vtctl = rd32(E1000_VT_CTL);
2942 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2943 E1000_VT_CTL_DISABLE_DEF_POOL);
2944 vtctl |= adapter->vfs_allocated_count <<
2945 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2946 wr32(E1000_VT_CTL, vtctl);
2948 if (adapter->rss_queues > 1)
2949 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2951 mrqc = E1000_MRQC_ENABLE_VMDQ;
2953 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2955 igb_vmm_control(adapter);
2958 * Generate RSS hash based on TCP port numbers and/or
2959 * IPv4/v6 src and dst addresses since UDP cannot be
2960 * hashed reliably due to IP fragmentation
2962 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2963 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2964 E1000_MRQC_RSS_FIELD_IPV6 |
2965 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2966 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2968 wr32(E1000_MRQC, mrqc);
2972 * igb_setup_rctl - configure the receive control registers
2973 * @adapter: Board private structure
2975 void igb_setup_rctl(struct igb_adapter *adapter)
2977 struct e1000_hw *hw = &adapter->hw;
2980 rctl = rd32(E1000_RCTL);
2982 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2983 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2985 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2986 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2989 * enable stripping of CRC. It's unlikely this will break BMC
2990 * redirection as it did with e1000. Newer features require
2991 * that the HW strips the CRC.
2993 rctl |= E1000_RCTL_SECRC;
2995 /* disable store bad packets and clear size bits. */
2996 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2998 /* enable LPE to prevent packets larger than max_frame_size */
2999 rctl |= E1000_RCTL_LPE;
3001 /* disable queue 0 to prevent tail write w/o re-config */
3002 wr32(E1000_RXDCTL(0), 0);
3004 /* Attention!!! For SR-IOV PF driver operations you must enable
3005 * queue drop for all VF and PF queues to prevent head of line blocking
3006 * if an un-trusted VF does not provide descriptors to hardware.
3008 if (adapter->vfs_allocated_count) {
3009 /* set all queue drop enable bits */
3010 wr32(E1000_QDE, ALL_QUEUES);
3013 wr32(E1000_RCTL, rctl);
3016 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3019 struct e1000_hw *hw = &adapter->hw;
3022 /* if it isn't the PF check to see if VFs are enabled and
3023 * increase the size to support vlan tags */
3024 if (vfn < adapter->vfs_allocated_count &&
3025 adapter->vf_data[vfn].vlans_enabled)
3026 size += VLAN_TAG_SIZE;
3028 vmolr = rd32(E1000_VMOLR(vfn));
3029 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3030 vmolr |= size | E1000_VMOLR_LPE;
3031 wr32(E1000_VMOLR(vfn), vmolr);
3037 * igb_rlpml_set - set maximum receive packet size
3038 * @adapter: board private structure
3040 * Configure maximum receivable packet size.
3042 static void igb_rlpml_set(struct igb_adapter *adapter)
3044 u32 max_frame_size = adapter->max_frame_size;
3045 struct e1000_hw *hw = &adapter->hw;
3046 u16 pf_id = adapter->vfs_allocated_count;
3049 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3051 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3052 * to our max jumbo frame size, in case we need to enable
3053 * jumbo frames on one of the rings later.
3054 * This will not pass over-length frames into the default
3055 * queue because it's gated by the VMOLR.RLPML.
3057 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3060 wr32(E1000_RLPML, max_frame_size);
3063 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3066 struct e1000_hw *hw = &adapter->hw;
3070 * This register exists only on 82576 and newer so if we are older then
3071 * we should exit and do nothing
3073 if (hw->mac.type < e1000_82576)
3076 vmolr = rd32(E1000_VMOLR(vfn));
3077 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3079 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3081 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3083 /* clear all bits that might not be set */
3084 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3086 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3087 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3089 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3092 if (vfn <= adapter->vfs_allocated_count)
3093 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3095 wr32(E1000_VMOLR(vfn), vmolr);
3099 * igb_configure_rx_ring - Configure a receive ring after Reset
3100 * @adapter: board private structure
3101 * @ring: receive ring to be configured
3103 * Configure the Rx unit of the MAC after a reset.
3105 void igb_configure_rx_ring(struct igb_adapter *adapter,
3106 struct igb_ring *ring)
3108 struct e1000_hw *hw = &adapter->hw;
3109 u64 rdba = ring->dma;
3110 int reg_idx = ring->reg_idx;
3111 u32 srrctl = 0, rxdctl = 0;
3113 /* disable the queue */
3114 wr32(E1000_RXDCTL(reg_idx), 0);
3116 /* Set DMA base address registers */
3117 wr32(E1000_RDBAL(reg_idx),
3118 rdba & 0x00000000ffffffffULL);
3119 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3120 wr32(E1000_RDLEN(reg_idx),
3121 ring->count * sizeof(union e1000_adv_rx_desc));
3123 /* initialize head and tail */
3124 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3125 wr32(E1000_RDH(reg_idx), 0);
3126 writel(0, ring->tail);
3128 /* set descriptor configuration */
3129 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3130 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3131 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3133 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3135 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3136 if (hw->mac.type == e1000_82580)
3137 srrctl |= E1000_SRRCTL_TIMESTAMP;
3138 /* Only set Drop Enable if we are supporting multiple queues */
3139 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3140 srrctl |= E1000_SRRCTL_DROP_EN;
3142 wr32(E1000_SRRCTL(reg_idx), srrctl);
3144 /* set filtering for VMDQ pools */
3145 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3147 rxdctl |= IGB_RX_PTHRESH;
3148 rxdctl |= IGB_RX_HTHRESH << 8;
3149 rxdctl |= IGB_RX_WTHRESH << 16;
3151 /* enable receive descriptor fetching */
3152 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3153 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3157 * igb_configure_rx - Configure receive Unit after Reset
3158 * @adapter: board private structure
3160 * Configure the Rx unit of the MAC after a reset.
3162 static void igb_configure_rx(struct igb_adapter *adapter)
3166 /* set UTA to appropriate mode */
3167 igb_set_uta(adapter);
3169 /* set the correct pool for the PF default MAC address in entry 0 */
3170 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3171 adapter->vfs_allocated_count);
3173 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3174 * the Base and Length of the Rx Descriptor Ring */
3175 for (i = 0; i < adapter->num_rx_queues; i++)
3176 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3180 * igb_free_tx_resources - Free Tx Resources per Queue
3181 * @tx_ring: Tx descriptor ring for a specific queue
3183 * Free all transmit software resources
3185 void igb_free_tx_resources(struct igb_ring *tx_ring)
3187 igb_clean_tx_ring(tx_ring);
3189 vfree(tx_ring->tx_buffer_info);
3190 tx_ring->tx_buffer_info = NULL;
3192 /* if not set, then don't free */
3196 dma_free_coherent(tx_ring->dev, tx_ring->size,
3197 tx_ring->desc, tx_ring->dma);
3199 tx_ring->desc = NULL;
3203 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3204 * @adapter: board private structure
3206 * Free all transmit software resources
3208 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3212 for (i = 0; i < adapter->num_tx_queues; i++)
3213 igb_free_tx_resources(adapter->tx_ring[i]);
3216 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3217 struct igb_tx_buffer *tx_buffer)
3219 if (tx_buffer->skb) {
3220 dev_kfree_skb_any(tx_buffer->skb);
3222 dma_unmap_single(ring->dev,
3226 } else if (tx_buffer->dma) {
3227 dma_unmap_page(ring->dev,
3232 tx_buffer->next_to_watch = NULL;
3233 tx_buffer->skb = NULL;
3235 /* buffer_info must be completely set up in the transmit path */
3239 * igb_clean_tx_ring - Free Tx Buffers
3240 * @tx_ring: ring to be cleaned
3242 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3244 struct igb_tx_buffer *buffer_info;
3248 if (!tx_ring->tx_buffer_info)
3250 /* Free all the Tx ring sk_buffs */
3252 for (i = 0; i < tx_ring->count; i++) {
3253 buffer_info = &tx_ring->tx_buffer_info[i];
3254 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3257 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3258 memset(tx_ring->tx_buffer_info, 0, size);
3260 /* Zero out the descriptor ring */
3261 memset(tx_ring->desc, 0, tx_ring->size);
3263 tx_ring->next_to_use = 0;
3264 tx_ring->next_to_clean = 0;
3268 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3269 * @adapter: board private structure
3271 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3275 for (i = 0; i < adapter->num_tx_queues; i++)
3276 igb_clean_tx_ring(adapter->tx_ring[i]);
3280 * igb_free_rx_resources - Free Rx Resources
3281 * @rx_ring: ring to clean the resources from
3283 * Free all receive software resources
3285 void igb_free_rx_resources(struct igb_ring *rx_ring)
3287 igb_clean_rx_ring(rx_ring);
3289 vfree(rx_ring->rx_buffer_info);
3290 rx_ring->rx_buffer_info = NULL;
3292 /* if not set, then don't free */
3296 dma_free_coherent(rx_ring->dev, rx_ring->size,
3297 rx_ring->desc, rx_ring->dma);
3299 rx_ring->desc = NULL;
3303 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3304 * @adapter: board private structure
3306 * Free all receive software resources
3308 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3312 for (i = 0; i < adapter->num_rx_queues; i++)
3313 igb_free_rx_resources(adapter->rx_ring[i]);
3317 * igb_clean_rx_ring - Free Rx Buffers per Queue
3318 * @rx_ring: ring to free buffers from
3320 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3325 if (!rx_ring->rx_buffer_info)
3328 /* Free all the Rx ring sk_buffs */
3329 for (i = 0; i < rx_ring->count; i++) {
3330 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3331 if (buffer_info->dma) {
3332 dma_unmap_single(rx_ring->dev,
3336 buffer_info->dma = 0;
3339 if (buffer_info->skb) {
3340 dev_kfree_skb(buffer_info->skb);
3341 buffer_info->skb = NULL;
3343 if (buffer_info->page_dma) {
3344 dma_unmap_page(rx_ring->dev,
3345 buffer_info->page_dma,
3348 buffer_info->page_dma = 0;
3350 if (buffer_info->page) {
3351 put_page(buffer_info->page);
3352 buffer_info->page = NULL;
3353 buffer_info->page_offset = 0;
3357 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3358 memset(rx_ring->rx_buffer_info, 0, size);
3360 /* Zero out the descriptor ring */
3361 memset(rx_ring->desc, 0, rx_ring->size);
3363 rx_ring->next_to_clean = 0;
3364 rx_ring->next_to_use = 0;
3368 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3369 * @adapter: board private structure
3371 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3375 for (i = 0; i < adapter->num_rx_queues; i++)
3376 igb_clean_rx_ring(adapter->rx_ring[i]);
3380 * igb_set_mac - Change the Ethernet Address of the NIC
3381 * @netdev: network interface device structure
3382 * @p: pointer to an address structure
3384 * Returns 0 on success, negative on failure
3386 static int igb_set_mac(struct net_device *netdev, void *p)
3388 struct igb_adapter *adapter = netdev_priv(netdev);
3389 struct e1000_hw *hw = &adapter->hw;
3390 struct sockaddr *addr = p;
3392 if (!is_valid_ether_addr(addr->sa_data))
3393 return -EADDRNOTAVAIL;
3395 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3396 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3398 /* set the correct pool for the new PF MAC address in entry 0 */
3399 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3400 adapter->vfs_allocated_count);
3406 * igb_write_mc_addr_list - write multicast addresses to MTA
3407 * @netdev: network interface device structure
3409 * Writes multicast address list to the MTA hash table.
3410 * Returns: -ENOMEM on failure
3411 * 0 on no addresses written
3412 * X on writing X addresses to MTA
3414 static int igb_write_mc_addr_list(struct net_device *netdev)
3416 struct igb_adapter *adapter = netdev_priv(netdev);
3417 struct e1000_hw *hw = &adapter->hw;
3418 struct netdev_hw_addr *ha;
3422 if (netdev_mc_empty(netdev)) {
3423 /* nothing to program, so clear mc list */
3424 igb_update_mc_addr_list(hw, NULL, 0);
3425 igb_restore_vf_multicasts(adapter);
3429 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3433 /* The shared function expects a packed array of only addresses. */
3435 netdev_for_each_mc_addr(ha, netdev)
3436 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3438 igb_update_mc_addr_list(hw, mta_list, i);
3441 return netdev_mc_count(netdev);
3445 * igb_write_uc_addr_list - write unicast addresses to RAR table
3446 * @netdev: network interface device structure
3448 * Writes unicast address list to the RAR table.
3449 * Returns: -ENOMEM on failure/insufficient address space
3450 * 0 on no addresses written
3451 * X on writing X addresses to the RAR table
3453 static int igb_write_uc_addr_list(struct net_device *netdev)
3455 struct igb_adapter *adapter = netdev_priv(netdev);
3456 struct e1000_hw *hw = &adapter->hw;
3457 unsigned int vfn = adapter->vfs_allocated_count;
3458 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3461 /* return ENOMEM indicating insufficient memory for addresses */
3462 if (netdev_uc_count(netdev) > rar_entries)
3465 if (!netdev_uc_empty(netdev) && rar_entries) {
3466 struct netdev_hw_addr *ha;
3468 netdev_for_each_uc_addr(ha, netdev) {
3471 igb_rar_set_qsel(adapter, ha->addr,
3477 /* write the addresses in reverse order to avoid write combining */
3478 for (; rar_entries > 0 ; rar_entries--) {
3479 wr32(E1000_RAH(rar_entries), 0);
3480 wr32(E1000_RAL(rar_entries), 0);
3488 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3489 * @netdev: network interface device structure
3491 * The set_rx_mode entry point is called whenever the unicast or multicast
3492 * address lists or the network interface flags are updated. This routine is
3493 * responsible for configuring the hardware for proper unicast, multicast,
3494 * promiscuous mode, and all-multi behavior.
3496 static void igb_set_rx_mode(struct net_device *netdev)
3498 struct igb_adapter *adapter = netdev_priv(netdev);
3499 struct e1000_hw *hw = &adapter->hw;
3500 unsigned int vfn = adapter->vfs_allocated_count;
3501 u32 rctl, vmolr = 0;
3504 /* Check for Promiscuous and All Multicast modes */
3505 rctl = rd32(E1000_RCTL);
3507 /* clear the effected bits */
3508 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3510 if (netdev->flags & IFF_PROMISC) {
3511 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3512 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3514 if (netdev->flags & IFF_ALLMULTI) {
3515 rctl |= E1000_RCTL_MPE;
3516 vmolr |= E1000_VMOLR_MPME;
3519 * Write addresses to the MTA, if the attempt fails
3520 * then we should just turn on promiscuous mode so
3521 * that we can at least receive multicast traffic
3523 count = igb_write_mc_addr_list(netdev);
3525 rctl |= E1000_RCTL_MPE;
3526 vmolr |= E1000_VMOLR_MPME;
3528 vmolr |= E1000_VMOLR_ROMPE;
3532 * Write addresses to available RAR registers, if there is not
3533 * sufficient space to store all the addresses then enable
3534 * unicast promiscuous mode
3536 count = igb_write_uc_addr_list(netdev);
3538 rctl |= E1000_RCTL_UPE;
3539 vmolr |= E1000_VMOLR_ROPE;
3541 rctl |= E1000_RCTL_VFE;
3543 wr32(E1000_RCTL, rctl);
3546 * In order to support SR-IOV and eventually VMDq it is necessary to set
3547 * the VMOLR to enable the appropriate modes. Without this workaround
3548 * we will have issues with VLAN tag stripping not being done for frames
3549 * that are only arriving because we are the default pool
3551 if (hw->mac.type < e1000_82576)
3554 vmolr |= rd32(E1000_VMOLR(vfn)) &
3555 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3556 wr32(E1000_VMOLR(vfn), vmolr);
3557 igb_restore_vf_multicasts(adapter);
3560 static void igb_check_wvbr(struct igb_adapter *adapter)
3562 struct e1000_hw *hw = &adapter->hw;
3565 switch (hw->mac.type) {
3568 if (!(wvbr = rd32(E1000_WVBR)))
3575 adapter->wvbr |= wvbr;
3578 #define IGB_STAGGERED_QUEUE_OFFSET 8
3580 static void igb_spoof_check(struct igb_adapter *adapter)
3587 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3588 if (adapter->wvbr & (1 << j) ||
3589 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3590 dev_warn(&adapter->pdev->dev,
3591 "Spoof event(s) detected on VF %d\n", j);
3594 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3599 /* Need to wait a few seconds after link up to get diagnostic information from
3601 static void igb_update_phy_info(unsigned long data)
3603 struct igb_adapter *adapter = (struct igb_adapter *) data;
3604 igb_get_phy_info(&adapter->hw);
3608 * igb_has_link - check shared code for link and determine up/down
3609 * @adapter: pointer to driver private info
3611 bool igb_has_link(struct igb_adapter *adapter)
3613 struct e1000_hw *hw = &adapter->hw;
3614 bool link_active = false;
3617 /* get_link_status is set on LSC (link status) interrupt or
3618 * rx sequence error interrupt. get_link_status will stay
3619 * false until the e1000_check_for_link establishes link
3620 * for copper adapters ONLY
3622 switch (hw->phy.media_type) {
3623 case e1000_media_type_copper:
3624 if (hw->mac.get_link_status) {
3625 ret_val = hw->mac.ops.check_for_link(hw);
3626 link_active = !hw->mac.get_link_status;
3631 case e1000_media_type_internal_serdes:
3632 ret_val = hw->mac.ops.check_for_link(hw);
3633 link_active = hw->mac.serdes_has_link;
3636 case e1000_media_type_unknown:
3643 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3646 u32 ctrl_ext, thstat;
3648 /* check for thermal sensor event on i350, copper only */
3649 if (hw->mac.type == e1000_i350) {
3650 thstat = rd32(E1000_THSTAT);
3651 ctrl_ext = rd32(E1000_CTRL_EXT);
3653 if ((hw->phy.media_type == e1000_media_type_copper) &&
3654 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3655 ret = !!(thstat & event);
3663 * igb_watchdog - Timer Call-back
3664 * @data: pointer to adapter cast into an unsigned long
3666 static void igb_watchdog(unsigned long data)
3668 struct igb_adapter *adapter = (struct igb_adapter *)data;
3669 /* Do the rest outside of interrupt context */
3670 schedule_work(&adapter->watchdog_task);
3673 static void igb_watchdog_task(struct work_struct *work)
3675 struct igb_adapter *adapter = container_of(work,
3678 struct e1000_hw *hw = &adapter->hw;
3679 struct net_device *netdev = adapter->netdev;
3683 link = igb_has_link(adapter);
3685 if (!netif_carrier_ok(netdev)) {
3687 hw->mac.ops.get_speed_and_duplex(hw,
3688 &adapter->link_speed,
3689 &adapter->link_duplex);
3691 ctrl = rd32(E1000_CTRL);
3692 /* Links status message must follow this format */
3693 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3694 "Flow Control: %s\n",
3696 adapter->link_speed,
3697 adapter->link_duplex == FULL_DUPLEX ?
3698 "Full Duplex" : "Half Duplex",
3699 ((ctrl & E1000_CTRL_TFCE) &&
3700 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3701 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3702 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3704 /* check for thermal sensor event */
3705 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3706 printk(KERN_INFO "igb: %s The network adapter "
3707 "link speed was downshifted "
3708 "because it overheated.\n",
3712 /* adjust timeout factor according to speed/duplex */
3713 adapter->tx_timeout_factor = 1;
3714 switch (adapter->link_speed) {
3716 adapter->tx_timeout_factor = 14;
3719 /* maybe add some timeout factor ? */
3723 netif_carrier_on(netdev);
3725 igb_ping_all_vfs(adapter);
3726 igb_check_vf_rate_limit(adapter);
3728 /* link state has changed, schedule phy info update */
3729 if (!test_bit(__IGB_DOWN, &adapter->state))
3730 mod_timer(&adapter->phy_info_timer,
3731 round_jiffies(jiffies + 2 * HZ));
3734 if (netif_carrier_ok(netdev)) {
3735 adapter->link_speed = 0;
3736 adapter->link_duplex = 0;
3738 /* check for thermal sensor event */
3739 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3740 printk(KERN_ERR "igb: %s The network adapter "
3741 "was stopped because it "
3746 /* Links status message must follow this format */
3747 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3749 netif_carrier_off(netdev);
3751 igb_ping_all_vfs(adapter);
3753 /* link state has changed, schedule phy info update */
3754 if (!test_bit(__IGB_DOWN, &adapter->state))
3755 mod_timer(&adapter->phy_info_timer,
3756 round_jiffies(jiffies + 2 * HZ));
3760 spin_lock(&adapter->stats64_lock);
3761 igb_update_stats(adapter, &adapter->stats64);
3762 spin_unlock(&adapter->stats64_lock);
3764 for (i = 0; i < adapter->num_tx_queues; i++) {
3765 struct igb_ring *tx_ring = adapter->tx_ring[i];
3766 if (!netif_carrier_ok(netdev)) {
3767 /* We've lost link, so the controller stops DMA,
3768 * but we've got queued Tx work that's never going
3769 * to get done, so reset controller to flush Tx.
3770 * (Do the reset outside of interrupt context). */
3771 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3772 adapter->tx_timeout_count++;
3773 schedule_work(&adapter->reset_task);
3774 /* return immediately since reset is imminent */
3779 /* Force detection of hung controller every watchdog period */
3780 tx_ring->detect_tx_hung = true;
3783 /* Cause software interrupt to ensure rx ring is cleaned */
3784 if (adapter->msix_entries) {
3786 for (i = 0; i < adapter->num_q_vectors; i++) {
3787 struct igb_q_vector *q_vector = adapter->q_vector[i];
3788 eics |= q_vector->eims_value;
3790 wr32(E1000_EICS, eics);
3792 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3795 igb_spoof_check(adapter);
3797 /* Reset the timer */
3798 if (!test_bit(__IGB_DOWN, &adapter->state))
3799 mod_timer(&adapter->watchdog_timer,
3800 round_jiffies(jiffies + 2 * HZ));
3803 enum latency_range {
3807 latency_invalid = 255
3811 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3813 * Stores a new ITR value based on strictly on packet size. This
3814 * algorithm is less sophisticated than that used in igb_update_itr,
3815 * due to the difficulty of synchronizing statistics across multiple
3816 * receive rings. The divisors and thresholds used by this function
3817 * were determined based on theoretical maximum wire speed and testing
3818 * data, in order to minimize response time while increasing bulk
3820 * This functionality is controlled by the InterruptThrottleRate module
3821 * parameter (see igb_param.c)
3822 * NOTE: This function is called only when operating in a multiqueue
3823 * receive environment.
3824 * @q_vector: pointer to q_vector
3826 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3828 int new_val = q_vector->itr_val;
3829 int avg_wire_size = 0;
3830 struct igb_adapter *adapter = q_vector->adapter;
3831 unsigned int packets;
3833 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3834 * ints/sec - ITR timer value of 120 ticks.
3836 if (adapter->link_speed != SPEED_1000) {
3837 new_val = IGB_4K_ITR;
3841 packets = q_vector->rx.total_packets;
3843 avg_wire_size = q_vector->rx.total_bytes / packets;
3845 packets = q_vector->tx.total_packets;
3847 avg_wire_size = max_t(u32, avg_wire_size,
3848 q_vector->tx.total_bytes / packets);
3850 /* if avg_wire_size isn't set no work was done */
3854 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3855 avg_wire_size += 24;
3857 /* Don't starve jumbo frames */
3858 avg_wire_size = min(avg_wire_size, 3000);
3860 /* Give a little boost to mid-size frames */
3861 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3862 new_val = avg_wire_size / 3;
3864 new_val = avg_wire_size / 2;
3866 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3867 if (new_val < IGB_20K_ITR &&
3868 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3869 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3870 new_val = IGB_20K_ITR;
3873 if (new_val != q_vector->itr_val) {
3874 q_vector->itr_val = new_val;
3875 q_vector->set_itr = 1;
3878 q_vector->rx.total_bytes = 0;
3879 q_vector->rx.total_packets = 0;
3880 q_vector->tx.total_bytes = 0;
3881 q_vector->tx.total_packets = 0;
3885 * igb_update_itr - update the dynamic ITR value based on statistics
3886 * Stores a new ITR value based on packets and byte
3887 * counts during the last interrupt. The advantage of per interrupt
3888 * computation is faster updates and more accurate ITR for the current
3889 * traffic pattern. Constants in this function were computed
3890 * based on theoretical maximum wire speed and thresholds were set based
3891 * on testing data as well as attempting to minimize response time
3892 * while increasing bulk throughput.
3893 * this functionality is controlled by the InterruptThrottleRate module
3894 * parameter (see igb_param.c)
3895 * NOTE: These calculations are only valid when operating in a single-
3896 * queue environment.
3897 * @q_vector: pointer to q_vector
3898 * @ring_container: ring info to update the itr for
3900 static void igb_update_itr(struct igb_q_vector *q_vector,
3901 struct igb_ring_container *ring_container)
3903 unsigned int packets = ring_container->total_packets;
3904 unsigned int bytes = ring_container->total_bytes;
3905 u8 itrval = ring_container->itr;
3907 /* no packets, exit with status unchanged */
3912 case lowest_latency:
3913 /* handle TSO and jumbo frames */
3914 if (bytes/packets > 8000)
3915 itrval = bulk_latency;
3916 else if ((packets < 5) && (bytes > 512))
3917 itrval = low_latency;
3919 case low_latency: /* 50 usec aka 20000 ints/s */
3920 if (bytes > 10000) {
3921 /* this if handles the TSO accounting */
3922 if (bytes/packets > 8000) {
3923 itrval = bulk_latency;
3924 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3925 itrval = bulk_latency;
3926 } else if ((packets > 35)) {
3927 itrval = lowest_latency;
3929 } else if (bytes/packets > 2000) {
3930 itrval = bulk_latency;
3931 } else if (packets <= 2 && bytes < 512) {
3932 itrval = lowest_latency;
3935 case bulk_latency: /* 250 usec aka 4000 ints/s */
3936 if (bytes > 25000) {
3938 itrval = low_latency;
3939 } else if (bytes < 1500) {
3940 itrval = low_latency;
3945 /* clear work counters since we have the values we need */
3946 ring_container->total_bytes = 0;
3947 ring_container->total_packets = 0;
3949 /* write updated itr to ring container */
3950 ring_container->itr = itrval;
3953 static void igb_set_itr(struct igb_q_vector *q_vector)
3955 struct igb_adapter *adapter = q_vector->adapter;
3956 u32 new_itr = q_vector->itr_val;
3959 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3960 if (adapter->link_speed != SPEED_1000) {
3962 new_itr = IGB_4K_ITR;
3966 igb_update_itr(q_vector, &q_vector->tx);
3967 igb_update_itr(q_vector, &q_vector->rx);
3969 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3971 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3972 if (current_itr == lowest_latency &&
3973 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3974 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3975 current_itr = low_latency;
3977 switch (current_itr) {
3978 /* counts and packets in update_itr are dependent on these numbers */
3979 case lowest_latency:
3980 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3983 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3986 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3993 if (new_itr != q_vector->itr_val) {
3994 /* this attempts to bias the interrupt rate towards Bulk
3995 * by adding intermediate steps when interrupt rate is
3997 new_itr = new_itr > q_vector->itr_val ?
3998 max((new_itr * q_vector->itr_val) /
3999 (new_itr + (q_vector->itr_val >> 2)),
4002 /* Don't write the value here; it resets the adapter's
4003 * internal timer, and causes us to delay far longer than
4004 * we should between interrupts. Instead, we write the ITR
4005 * value at the beginning of the next interrupt so the timing
4006 * ends up being correct.
4008 q_vector->itr_val = new_itr;
4009 q_vector->set_itr = 1;
4013 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4014 u32 type_tucmd, u32 mss_l4len_idx)
4016 struct e1000_adv_tx_context_desc *context_desc;
4017 u16 i = tx_ring->next_to_use;
4019 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4022 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4024 /* set bits to identify this as an advanced context descriptor */
4025 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4027 /* For 82575, context index must be unique per ring. */
4028 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4029 mss_l4len_idx |= tx_ring->reg_idx << 4;
4031 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4032 context_desc->seqnum_seed = 0;
4033 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4034 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4037 static int igb_tso(struct igb_ring *tx_ring,
4038 struct igb_tx_buffer *first,
4041 struct sk_buff *skb = first->skb;
4042 u32 vlan_macip_lens, type_tucmd;
4043 u32 mss_l4len_idx, l4len;
4045 if (!skb_is_gso(skb))
4048 if (skb_header_cloned(skb)) {
4049 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4054 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4055 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4057 if (first->protocol == __constant_htons(ETH_P_IP)) {
4058 struct iphdr *iph = ip_hdr(skb);
4061 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4065 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4066 first->tx_flags |= IGB_TX_FLAGS_TSO |
4069 } else if (skb_is_gso_v6(skb)) {
4070 ipv6_hdr(skb)->payload_len = 0;
4071 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4072 &ipv6_hdr(skb)->daddr,
4074 first->tx_flags |= IGB_TX_FLAGS_TSO |
4078 /* compute header lengths */
4079 l4len = tcp_hdrlen(skb);
4080 *hdr_len = skb_transport_offset(skb) + l4len;
4082 /* update gso size and bytecount with header size */
4083 first->gso_segs = skb_shinfo(skb)->gso_segs;
4084 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4087 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4088 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4090 /* VLAN MACLEN IPLEN */
4091 vlan_macip_lens = skb_network_header_len(skb);
4092 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4093 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4095 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4100 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4102 struct sk_buff *skb = first->skb;
4103 u32 vlan_macip_lens = 0;
4104 u32 mss_l4len_idx = 0;
4107 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4108 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4112 switch (first->protocol) {
4113 case __constant_htons(ETH_P_IP):
4114 vlan_macip_lens |= skb_network_header_len(skb);
4115 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4116 l4_hdr = ip_hdr(skb)->protocol;
4118 case __constant_htons(ETH_P_IPV6):
4119 vlan_macip_lens |= skb_network_header_len(skb);
4120 l4_hdr = ipv6_hdr(skb)->nexthdr;
4123 if (unlikely(net_ratelimit())) {
4124 dev_warn(tx_ring->dev,
4125 "partial checksum but proto=%x!\n",
4133 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4134 mss_l4len_idx = tcp_hdrlen(skb) <<
4135 E1000_ADVTXD_L4LEN_SHIFT;
4138 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4139 mss_l4len_idx = sizeof(struct sctphdr) <<
4140 E1000_ADVTXD_L4LEN_SHIFT;
4143 mss_l4len_idx = sizeof(struct udphdr) <<
4144 E1000_ADVTXD_L4LEN_SHIFT;
4147 if (unlikely(net_ratelimit())) {
4148 dev_warn(tx_ring->dev,
4149 "partial checksum but l4 proto=%x!\n",
4155 /* update TX checksum flag */
4156 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4159 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4160 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4162 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4165 static __le32 igb_tx_cmd_type(u32 tx_flags)
4167 /* set type for advanced descriptor with frame checksum insertion */
4168 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4169 E1000_ADVTXD_DCMD_IFCS |
4170 E1000_ADVTXD_DCMD_DEXT);
4172 /* set HW vlan bit if vlan is present */
4173 if (tx_flags & IGB_TX_FLAGS_VLAN)
4174 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4176 /* set timestamp bit if present */
4177 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4178 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4180 /* set segmentation bits for TSO */
4181 if (tx_flags & IGB_TX_FLAGS_TSO)
4182 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4187 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4188 union e1000_adv_tx_desc *tx_desc,
4189 u32 tx_flags, unsigned int paylen)
4191 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4193 /* 82575 requires a unique index per ring if any offload is enabled */
4194 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4195 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4196 olinfo_status |= tx_ring->reg_idx << 4;
4198 /* insert L4 checksum */
4199 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4200 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4202 /* insert IPv4 checksum */
4203 if (tx_flags & IGB_TX_FLAGS_IPV4)
4204 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4207 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4211 * The largest size we can write to the descriptor is 65535. In order to
4212 * maintain a power of two alignment we have to limit ourselves to 32K.
4214 #define IGB_MAX_TXD_PWR 15
4215 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4217 static void igb_tx_map(struct igb_ring *tx_ring,
4218 struct igb_tx_buffer *first,
4221 struct sk_buff *skb = first->skb;
4222 struct igb_tx_buffer *tx_buffer_info;
4223 union e1000_adv_tx_desc *tx_desc;
4225 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4226 unsigned int data_len = skb->data_len;
4227 unsigned int size = skb_headlen(skb);
4228 unsigned int paylen = skb->len - hdr_len;
4230 u32 tx_flags = first->tx_flags;
4231 u16 i = tx_ring->next_to_use;
4233 tx_desc = IGB_TX_DESC(tx_ring, i);
4235 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4236 cmd_type = igb_tx_cmd_type(tx_flags);
4238 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4239 if (dma_mapping_error(tx_ring->dev, dma))
4242 /* record length, and DMA address */
4243 first->length = size;
4245 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4248 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4249 tx_desc->read.cmd_type_len =
4250 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4254 if (i == tx_ring->count) {
4255 tx_desc = IGB_TX_DESC(tx_ring, 0);
4259 dma += IGB_MAX_DATA_PER_TXD;
4260 size -= IGB_MAX_DATA_PER_TXD;
4262 tx_desc->read.olinfo_status = 0;
4263 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4266 if (likely(!data_len))
4269 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4273 if (i == tx_ring->count) {
4274 tx_desc = IGB_TX_DESC(tx_ring, 0);
4281 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4282 size, DMA_TO_DEVICE);
4283 if (dma_mapping_error(tx_ring->dev, dma))
4286 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4287 tx_buffer_info->length = size;
4288 tx_buffer_info->dma = dma;
4290 tx_desc->read.olinfo_status = 0;
4291 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4296 /* write last descriptor with RS and EOP bits */
4297 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4298 tx_desc->read.cmd_type_len = cmd_type;
4300 /* set the timestamp */
4301 first->time_stamp = jiffies;
4304 * Force memory writes to complete before letting h/w know there
4305 * are new descriptors to fetch. (Only applicable for weak-ordered
4306 * memory model archs, such as IA-64).
4308 * We also need this memory barrier to make certain all of the
4309 * status bits have been updated before next_to_watch is written.
4313 /* set next_to_watch value indicating a packet is present */
4314 first->next_to_watch = tx_desc;
4317 if (i == tx_ring->count)
4320 tx_ring->next_to_use = i;
4322 writel(i, tx_ring->tail);
4324 /* we need this if more than one processor can write to our tail
4325 * at a time, it syncronizes IO on IA64/Altix systems */
4331 dev_err(tx_ring->dev, "TX DMA map failed\n");
4333 /* clear dma mappings for failed tx_buffer_info map */
4335 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4336 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4337 if (tx_buffer_info == first)
4344 tx_ring->next_to_use = i;
4347 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4349 struct net_device *netdev = tx_ring->netdev;
4351 netif_stop_subqueue(netdev, tx_ring->queue_index);
4353 /* Herbert's original patch had:
4354 * smp_mb__after_netif_stop_queue();
4355 * but since that doesn't exist yet, just open code it. */
4358 /* We need to check again in a case another CPU has just
4359 * made room available. */
4360 if (igb_desc_unused(tx_ring) < size)
4364 netif_wake_subqueue(netdev, tx_ring->queue_index);
4366 u64_stats_update_begin(&tx_ring->tx_syncp2);
4367 tx_ring->tx_stats.restart_queue2++;
4368 u64_stats_update_end(&tx_ring->tx_syncp2);
4373 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4375 if (igb_desc_unused(tx_ring) >= size)
4377 return __igb_maybe_stop_tx(tx_ring, size);
4380 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4381 struct igb_ring *tx_ring)
4383 struct igb_tx_buffer *first;
4386 __be16 protocol = vlan_get_protocol(skb);
4389 /* need: 1 descriptor per page,
4390 * + 2 desc gap to keep tail from touching head,
4391 * + 1 desc for skb->data,
4392 * + 1 desc for context descriptor,
4393 * otherwise try next time */
4394 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4395 /* this is a hard error */
4396 return NETDEV_TX_BUSY;
4399 /* record the location of the first descriptor for this packet */
4400 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4402 first->bytecount = skb->len;
4403 first->gso_segs = 1;
4405 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4406 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4407 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4410 if (vlan_tx_tag_present(skb)) {
4411 tx_flags |= IGB_TX_FLAGS_VLAN;
4412 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4415 /* record initial flags and protocol */
4416 first->tx_flags = tx_flags;
4417 first->protocol = protocol;
4419 tso = igb_tso(tx_ring, first, &hdr_len);
4423 igb_tx_csum(tx_ring, first);
4425 igb_tx_map(tx_ring, first, hdr_len);
4427 /* Make sure there is space in the ring for the next send. */
4428 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4430 return NETDEV_TX_OK;
4433 igb_unmap_and_free_tx_resource(tx_ring, first);
4435 return NETDEV_TX_OK;
4438 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4439 struct sk_buff *skb)
4441 unsigned int r_idx = skb->queue_mapping;
4443 if (r_idx >= adapter->num_tx_queues)
4444 r_idx = r_idx % adapter->num_tx_queues;
4446 return adapter->tx_ring[r_idx];
4449 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4450 struct net_device *netdev)
4452 struct igb_adapter *adapter = netdev_priv(netdev);
4454 if (test_bit(__IGB_DOWN, &adapter->state)) {
4455 dev_kfree_skb_any(skb);
4456 return NETDEV_TX_OK;
4459 if (skb->len <= 0) {
4460 dev_kfree_skb_any(skb);
4461 return NETDEV_TX_OK;
4465 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4466 * in order to meet this minimum size requirement.
4468 if (skb->len < 17) {
4469 if (skb_padto(skb, 17))
4470 return NETDEV_TX_OK;
4474 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4478 * igb_tx_timeout - Respond to a Tx Hang
4479 * @netdev: network interface device structure
4481 static void igb_tx_timeout(struct net_device *netdev)
4483 struct igb_adapter *adapter = netdev_priv(netdev);
4484 struct e1000_hw *hw = &adapter->hw;
4486 /* Do the reset outside of interrupt context */
4487 adapter->tx_timeout_count++;
4489 if (hw->mac.type == e1000_82580)
4490 hw->dev_spec._82575.global_device_reset = true;
4492 schedule_work(&adapter->reset_task);
4494 (adapter->eims_enable_mask & ~adapter->eims_other));
4497 static void igb_reset_task(struct work_struct *work)
4499 struct igb_adapter *adapter;
4500 adapter = container_of(work, struct igb_adapter, reset_task);
4503 netdev_err(adapter->netdev, "Reset adapter\n");
4504 igb_reinit_locked(adapter);
4508 * igb_get_stats64 - Get System Network Statistics
4509 * @netdev: network interface device structure
4510 * @stats: rtnl_link_stats64 pointer
4513 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4514 struct rtnl_link_stats64 *stats)
4516 struct igb_adapter *adapter = netdev_priv(netdev);
4518 spin_lock(&adapter->stats64_lock);
4519 igb_update_stats(adapter, &adapter->stats64);
4520 memcpy(stats, &adapter->stats64, sizeof(*stats));
4521 spin_unlock(&adapter->stats64_lock);
4527 * igb_change_mtu - Change the Maximum Transfer Unit
4528 * @netdev: network interface device structure
4529 * @new_mtu: new value for maximum frame size
4531 * Returns 0 on success, negative on failure
4533 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4535 struct igb_adapter *adapter = netdev_priv(netdev);
4536 struct pci_dev *pdev = adapter->pdev;
4537 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4539 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4540 dev_err(&pdev->dev, "Invalid MTU setting\n");
4544 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4545 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4546 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4550 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4553 /* igb_down has a dependency on max_frame_size */
4554 adapter->max_frame_size = max_frame;
4556 if (netif_running(netdev))
4559 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4560 netdev->mtu, new_mtu);
4561 netdev->mtu = new_mtu;
4563 if (netif_running(netdev))
4568 clear_bit(__IGB_RESETTING, &adapter->state);
4574 * igb_update_stats - Update the board statistics counters
4575 * @adapter: board private structure
4578 void igb_update_stats(struct igb_adapter *adapter,
4579 struct rtnl_link_stats64 *net_stats)
4581 struct e1000_hw *hw = &adapter->hw;
4582 struct pci_dev *pdev = adapter->pdev;
4588 u64 _bytes, _packets;
4590 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4593 * Prevent stats update while adapter is being reset, or if the pci
4594 * connection is down.
4596 if (adapter->link_speed == 0)
4598 if (pci_channel_offline(pdev))
4603 for (i = 0; i < adapter->num_rx_queues; i++) {
4604 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4605 struct igb_ring *ring = adapter->rx_ring[i];
4607 ring->rx_stats.drops += rqdpc_tmp;
4608 net_stats->rx_fifo_errors += rqdpc_tmp;
4611 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4612 _bytes = ring->rx_stats.bytes;
4613 _packets = ring->rx_stats.packets;
4614 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4616 packets += _packets;
4619 net_stats->rx_bytes = bytes;
4620 net_stats->rx_packets = packets;
4624 for (i = 0; i < adapter->num_tx_queues; i++) {
4625 struct igb_ring *ring = adapter->tx_ring[i];
4627 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4628 _bytes = ring->tx_stats.bytes;
4629 _packets = ring->tx_stats.packets;
4630 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4632 packets += _packets;
4634 net_stats->tx_bytes = bytes;
4635 net_stats->tx_packets = packets;
4637 /* read stats registers */
4638 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4639 adapter->stats.gprc += rd32(E1000_GPRC);
4640 adapter->stats.gorc += rd32(E1000_GORCL);
4641 rd32(E1000_GORCH); /* clear GORCL */
4642 adapter->stats.bprc += rd32(E1000_BPRC);
4643 adapter->stats.mprc += rd32(E1000_MPRC);
4644 adapter->stats.roc += rd32(E1000_ROC);
4646 adapter->stats.prc64 += rd32(E1000_PRC64);
4647 adapter->stats.prc127 += rd32(E1000_PRC127);
4648 adapter->stats.prc255 += rd32(E1000_PRC255);
4649 adapter->stats.prc511 += rd32(E1000_PRC511);
4650 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4651 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4652 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4653 adapter->stats.sec += rd32(E1000_SEC);
4655 mpc = rd32(E1000_MPC);
4656 adapter->stats.mpc += mpc;
4657 net_stats->rx_fifo_errors += mpc;
4658 adapter->stats.scc += rd32(E1000_SCC);
4659 adapter->stats.ecol += rd32(E1000_ECOL);
4660 adapter->stats.mcc += rd32(E1000_MCC);
4661 adapter->stats.latecol += rd32(E1000_LATECOL);
4662 adapter->stats.dc += rd32(E1000_DC);
4663 adapter->stats.rlec += rd32(E1000_RLEC);
4664 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4665 adapter->stats.xontxc += rd32(E1000_XONTXC);
4666 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4667 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4668 adapter->stats.fcruc += rd32(E1000_FCRUC);
4669 adapter->stats.gptc += rd32(E1000_GPTC);
4670 adapter->stats.gotc += rd32(E1000_GOTCL);
4671 rd32(E1000_GOTCH); /* clear GOTCL */
4672 adapter->stats.rnbc += rd32(E1000_RNBC);
4673 adapter->stats.ruc += rd32(E1000_RUC);
4674 adapter->stats.rfc += rd32(E1000_RFC);
4675 adapter->stats.rjc += rd32(E1000_RJC);
4676 adapter->stats.tor += rd32(E1000_TORH);
4677 adapter->stats.tot += rd32(E1000_TOTH);
4678 adapter->stats.tpr += rd32(E1000_TPR);
4680 adapter->stats.ptc64 += rd32(E1000_PTC64);
4681 adapter->stats.ptc127 += rd32(E1000_PTC127);
4682 adapter->stats.ptc255 += rd32(E1000_PTC255);
4683 adapter->stats.ptc511 += rd32(E1000_PTC511);
4684 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4685 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4687 adapter->stats.mptc += rd32(E1000_MPTC);
4688 adapter->stats.bptc += rd32(E1000_BPTC);
4690 adapter->stats.tpt += rd32(E1000_TPT);
4691 adapter->stats.colc += rd32(E1000_COLC);
4693 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4694 /* read internal phy specific stats */
4695 reg = rd32(E1000_CTRL_EXT);
4696 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4697 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4698 adapter->stats.tncrs += rd32(E1000_TNCRS);
4701 adapter->stats.tsctc += rd32(E1000_TSCTC);
4702 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4704 adapter->stats.iac += rd32(E1000_IAC);
4705 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4706 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4707 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4708 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4709 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4710 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4711 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4712 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4714 /* Fill out the OS statistics structure */
4715 net_stats->multicast = adapter->stats.mprc;
4716 net_stats->collisions = adapter->stats.colc;
4720 /* RLEC on some newer hardware can be incorrect so build
4721 * our own version based on RUC and ROC */
4722 net_stats->rx_errors = adapter->stats.rxerrc +
4723 adapter->stats.crcerrs + adapter->stats.algnerrc +
4724 adapter->stats.ruc + adapter->stats.roc +
4725 adapter->stats.cexterr;
4726 net_stats->rx_length_errors = adapter->stats.ruc +
4728 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4729 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4730 net_stats->rx_missed_errors = adapter->stats.mpc;
4733 net_stats->tx_errors = adapter->stats.ecol +
4734 adapter->stats.latecol;
4735 net_stats->tx_aborted_errors = adapter->stats.ecol;
4736 net_stats->tx_window_errors = adapter->stats.latecol;
4737 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4739 /* Tx Dropped needs to be maintained elsewhere */
4742 if (hw->phy.media_type == e1000_media_type_copper) {
4743 if ((adapter->link_speed == SPEED_1000) &&
4744 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4745 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4746 adapter->phy_stats.idle_errors += phy_tmp;
4750 /* Management Stats */
4751 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4752 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4753 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4756 reg = rd32(E1000_MANC);
4757 if (reg & E1000_MANC_EN_BMC2OS) {
4758 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4759 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4760 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4761 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4765 static irqreturn_t igb_msix_other(int irq, void *data)
4767 struct igb_adapter *adapter = data;
4768 struct e1000_hw *hw = &adapter->hw;
4769 u32 icr = rd32(E1000_ICR);
4770 /* reading ICR causes bit 31 of EICR to be cleared */
4772 if (icr & E1000_ICR_DRSTA)
4773 schedule_work(&adapter->reset_task);
4775 if (icr & E1000_ICR_DOUTSYNC) {
4776 /* HW is reporting DMA is out of sync */
4777 adapter->stats.doosync++;
4778 /* The DMA Out of Sync is also indication of a spoof event
4779 * in IOV mode. Check the Wrong VM Behavior register to
4780 * see if it is really a spoof event. */
4781 igb_check_wvbr(adapter);
4784 /* Check for a mailbox event */
4785 if (icr & E1000_ICR_VMMB)
4786 igb_msg_task(adapter);
4788 if (icr & E1000_ICR_LSC) {
4789 hw->mac.get_link_status = 1;
4790 /* guard against interrupt when we're going down */
4791 if (!test_bit(__IGB_DOWN, &adapter->state))
4792 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4795 if (adapter->vfs_allocated_count)
4796 wr32(E1000_IMS, E1000_IMS_LSC |
4798 E1000_IMS_DOUTSYNC);
4800 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4801 wr32(E1000_EIMS, adapter->eims_other);
4806 static void igb_write_itr(struct igb_q_vector *q_vector)
4808 struct igb_adapter *adapter = q_vector->adapter;
4809 u32 itr_val = q_vector->itr_val & 0x7FFC;
4811 if (!q_vector->set_itr)
4817 if (adapter->hw.mac.type == e1000_82575)
4818 itr_val |= itr_val << 16;
4820 itr_val |= E1000_EITR_CNT_IGNR;
4822 writel(itr_val, q_vector->itr_register);
4823 q_vector->set_itr = 0;
4826 static irqreturn_t igb_msix_ring(int irq, void *data)
4828 struct igb_q_vector *q_vector = data;
4830 /* Write the ITR value calculated from the previous interrupt. */
4831 igb_write_itr(q_vector);
4833 napi_schedule(&q_vector->napi);
4838 #ifdef CONFIG_IGB_DCA
4839 static void igb_update_dca(struct igb_q_vector *q_vector)
4841 struct igb_adapter *adapter = q_vector->adapter;
4842 struct e1000_hw *hw = &adapter->hw;
4843 int cpu = get_cpu();
4845 if (q_vector->cpu == cpu)
4848 if (q_vector->tx.ring) {
4849 int q = q_vector->tx.ring->reg_idx;
4850 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4851 if (hw->mac.type == e1000_82575) {
4852 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4853 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4855 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4856 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4857 E1000_DCA_TXCTRL_CPUID_SHIFT;
4859 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4860 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4862 if (q_vector->rx.ring) {
4863 int q = q_vector->rx.ring->reg_idx;
4864 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4865 if (hw->mac.type == e1000_82575) {
4866 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4867 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4869 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4870 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4871 E1000_DCA_RXCTRL_CPUID_SHIFT;
4873 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4874 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4875 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4876 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4878 q_vector->cpu = cpu;
4883 static void igb_setup_dca(struct igb_adapter *adapter)
4885 struct e1000_hw *hw = &adapter->hw;
4888 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4891 /* Always use CB2 mode, difference is masked in the CB driver. */
4892 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4894 for (i = 0; i < adapter->num_q_vectors; i++) {
4895 adapter->q_vector[i]->cpu = -1;
4896 igb_update_dca(adapter->q_vector[i]);
4900 static int __igb_notify_dca(struct device *dev, void *data)
4902 struct net_device *netdev = dev_get_drvdata(dev);
4903 struct igb_adapter *adapter = netdev_priv(netdev);
4904 struct pci_dev *pdev = adapter->pdev;
4905 struct e1000_hw *hw = &adapter->hw;
4906 unsigned long event = *(unsigned long *)data;
4909 case DCA_PROVIDER_ADD:
4910 /* if already enabled, don't do it again */
4911 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4913 if (dca_add_requester(dev) == 0) {
4914 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4915 dev_info(&pdev->dev, "DCA enabled\n");
4916 igb_setup_dca(adapter);
4919 /* Fall Through since DCA is disabled. */
4920 case DCA_PROVIDER_REMOVE:
4921 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4922 /* without this a class_device is left
4923 * hanging around in the sysfs model */
4924 dca_remove_requester(dev);
4925 dev_info(&pdev->dev, "DCA disabled\n");
4926 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4927 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4935 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4940 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4943 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4945 #endif /* CONFIG_IGB_DCA */
4947 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4949 struct e1000_hw *hw = &adapter->hw;
4953 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4954 ping = E1000_PF_CONTROL_MSG;
4955 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4956 ping |= E1000_VT_MSGTYPE_CTS;
4957 igb_write_mbx(hw, &ping, 1, i);
4961 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4963 struct e1000_hw *hw = &adapter->hw;
4964 u32 vmolr = rd32(E1000_VMOLR(vf));
4965 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4967 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4968 IGB_VF_FLAG_MULTI_PROMISC);
4969 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4971 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4972 vmolr |= E1000_VMOLR_MPME;
4973 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4974 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4977 * if we have hashes and we are clearing a multicast promisc
4978 * flag we need to write the hashes to the MTA as this step
4979 * was previously skipped
4981 if (vf_data->num_vf_mc_hashes > 30) {
4982 vmolr |= E1000_VMOLR_MPME;
4983 } else if (vf_data->num_vf_mc_hashes) {
4985 vmolr |= E1000_VMOLR_ROMPE;
4986 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4987 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4991 wr32(E1000_VMOLR(vf), vmolr);
4993 /* there are flags left unprocessed, likely not supported */
4994 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5001 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5002 u32 *msgbuf, u32 vf)
5004 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5005 u16 *hash_list = (u16 *)&msgbuf[1];
5006 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5009 /* salt away the number of multicast addresses assigned
5010 * to this VF for later use to restore when the PF multi cast
5013 vf_data->num_vf_mc_hashes = n;
5015 /* only up to 30 hash values supported */
5019 /* store the hashes for later use */
5020 for (i = 0; i < n; i++)
5021 vf_data->vf_mc_hashes[i] = hash_list[i];
5023 /* Flush and reset the mta with the new values */
5024 igb_set_rx_mode(adapter->netdev);
5029 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5031 struct e1000_hw *hw = &adapter->hw;
5032 struct vf_data_storage *vf_data;
5035 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5036 u32 vmolr = rd32(E1000_VMOLR(i));
5037 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5039 vf_data = &adapter->vf_data[i];
5041 if ((vf_data->num_vf_mc_hashes > 30) ||
5042 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5043 vmolr |= E1000_VMOLR_MPME;
5044 } else if (vf_data->num_vf_mc_hashes) {
5045 vmolr |= E1000_VMOLR_ROMPE;
5046 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5047 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5049 wr32(E1000_VMOLR(i), vmolr);
5053 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5055 struct e1000_hw *hw = &adapter->hw;
5056 u32 pool_mask, reg, vid;
5059 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5061 /* Find the vlan filter for this id */
5062 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5063 reg = rd32(E1000_VLVF(i));
5065 /* remove the vf from the pool */
5068 /* if pool is empty then remove entry from vfta */
5069 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5070 (reg & E1000_VLVF_VLANID_ENABLE)) {
5072 vid = reg & E1000_VLVF_VLANID_MASK;
5073 igb_vfta_set(hw, vid, false);
5076 wr32(E1000_VLVF(i), reg);
5079 adapter->vf_data[vf].vlans_enabled = 0;
5082 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5084 struct e1000_hw *hw = &adapter->hw;
5087 /* The vlvf table only exists on 82576 hardware and newer */
5088 if (hw->mac.type < e1000_82576)
5091 /* we only need to do this if VMDq is enabled */
5092 if (!adapter->vfs_allocated_count)
5095 /* Find the vlan filter for this id */
5096 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5097 reg = rd32(E1000_VLVF(i));
5098 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5099 vid == (reg & E1000_VLVF_VLANID_MASK))
5104 if (i == E1000_VLVF_ARRAY_SIZE) {
5105 /* Did not find a matching VLAN ID entry that was
5106 * enabled. Search for a free filter entry, i.e.
5107 * one without the enable bit set
5109 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5110 reg = rd32(E1000_VLVF(i));
5111 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5115 if (i < E1000_VLVF_ARRAY_SIZE) {
5116 /* Found an enabled/available entry */
5117 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5119 /* if !enabled we need to set this up in vfta */
5120 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5121 /* add VID to filter table */
5122 igb_vfta_set(hw, vid, true);
5123 reg |= E1000_VLVF_VLANID_ENABLE;
5125 reg &= ~E1000_VLVF_VLANID_MASK;
5127 wr32(E1000_VLVF(i), reg);
5129 /* do not modify RLPML for PF devices */
5130 if (vf >= adapter->vfs_allocated_count)
5133 if (!adapter->vf_data[vf].vlans_enabled) {
5135 reg = rd32(E1000_VMOLR(vf));
5136 size = reg & E1000_VMOLR_RLPML_MASK;
5138 reg &= ~E1000_VMOLR_RLPML_MASK;
5140 wr32(E1000_VMOLR(vf), reg);
5143 adapter->vf_data[vf].vlans_enabled++;
5147 if (i < E1000_VLVF_ARRAY_SIZE) {
5148 /* remove vf from the pool */
5149 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5150 /* if pool is empty then remove entry from vfta */
5151 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5153 igb_vfta_set(hw, vid, false);
5155 wr32(E1000_VLVF(i), reg);
5157 /* do not modify RLPML for PF devices */
5158 if (vf >= adapter->vfs_allocated_count)
5161 adapter->vf_data[vf].vlans_enabled--;
5162 if (!adapter->vf_data[vf].vlans_enabled) {
5164 reg = rd32(E1000_VMOLR(vf));
5165 size = reg & E1000_VMOLR_RLPML_MASK;
5167 reg &= ~E1000_VMOLR_RLPML_MASK;
5169 wr32(E1000_VMOLR(vf), reg);
5176 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5178 struct e1000_hw *hw = &adapter->hw;
5181 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5183 wr32(E1000_VMVIR(vf), 0);
5186 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5187 int vf, u16 vlan, u8 qos)
5190 struct igb_adapter *adapter = netdev_priv(netdev);
5192 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5195 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5198 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5199 igb_set_vmolr(adapter, vf, !vlan);
5200 adapter->vf_data[vf].pf_vlan = vlan;
5201 adapter->vf_data[vf].pf_qos = qos;
5202 dev_info(&adapter->pdev->dev,
5203 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5204 if (test_bit(__IGB_DOWN, &adapter->state)) {
5205 dev_warn(&adapter->pdev->dev,
5206 "The VF VLAN has been set,"
5207 " but the PF device is not up.\n");
5208 dev_warn(&adapter->pdev->dev,
5209 "Bring the PF device up before"
5210 " attempting to use the VF device.\n");
5213 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5215 igb_set_vmvir(adapter, vlan, vf);
5216 igb_set_vmolr(adapter, vf, true);
5217 adapter->vf_data[vf].pf_vlan = 0;
5218 adapter->vf_data[vf].pf_qos = 0;
5224 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5226 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5227 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5229 return igb_vlvf_set(adapter, vid, add, vf);
5232 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5234 /* clear flags - except flag that indicates PF has set the MAC */
5235 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5236 adapter->vf_data[vf].last_nack = jiffies;
5238 /* reset offloads to defaults */
5239 igb_set_vmolr(adapter, vf, true);
5241 /* reset vlans for device */
5242 igb_clear_vf_vfta(adapter, vf);
5243 if (adapter->vf_data[vf].pf_vlan)
5244 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5245 adapter->vf_data[vf].pf_vlan,
5246 adapter->vf_data[vf].pf_qos);
5248 igb_clear_vf_vfta(adapter, vf);
5250 /* reset multicast table array for vf */
5251 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5253 /* Flush and reset the mta with the new values */
5254 igb_set_rx_mode(adapter->netdev);
5257 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5259 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5261 /* generate a new mac address as we were hotplug removed/added */
5262 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5263 random_ether_addr(vf_mac);
5265 /* process remaining reset events */
5266 igb_vf_reset(adapter, vf);
5269 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5271 struct e1000_hw *hw = &adapter->hw;
5272 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5273 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5275 u8 *addr = (u8 *)(&msgbuf[1]);
5277 /* process all the same items cleared in a function level reset */
5278 igb_vf_reset(adapter, vf);
5280 /* set vf mac address */
5281 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5283 /* enable transmit and receive for vf */
5284 reg = rd32(E1000_VFTE);
5285 wr32(E1000_VFTE, reg | (1 << vf));
5286 reg = rd32(E1000_VFRE);
5287 wr32(E1000_VFRE, reg | (1 << vf));
5289 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5291 /* reply to reset with ack and vf mac address */
5292 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5293 memcpy(addr, vf_mac, 6);
5294 igb_write_mbx(hw, msgbuf, 3, vf);
5297 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5300 * The VF MAC Address is stored in a packed array of bytes
5301 * starting at the second 32 bit word of the msg array
5303 unsigned char *addr = (char *)&msg[1];
5306 if (is_valid_ether_addr(addr))
5307 err = igb_set_vf_mac(adapter, vf, addr);
5312 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5314 struct e1000_hw *hw = &adapter->hw;
5315 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5316 u32 msg = E1000_VT_MSGTYPE_NACK;
5318 /* if device isn't clear to send it shouldn't be reading either */
5319 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5320 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5321 igb_write_mbx(hw, &msg, 1, vf);
5322 vf_data->last_nack = jiffies;
5326 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5328 struct pci_dev *pdev = adapter->pdev;
5329 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5330 struct e1000_hw *hw = &adapter->hw;
5331 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5334 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5337 /* if receive failed revoke VF CTS stats and restart init */
5338 dev_err(&pdev->dev, "Error receiving message from VF\n");
5339 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5340 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5345 /* this is a message we already processed, do nothing */
5346 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5350 * until the vf completes a reset it should not be
5351 * allowed to start any configuration.
5354 if (msgbuf[0] == E1000_VF_RESET) {
5355 igb_vf_reset_msg(adapter, vf);
5359 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5360 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5366 switch ((msgbuf[0] & 0xFFFF)) {
5367 case E1000_VF_SET_MAC_ADDR:
5369 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5370 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5372 dev_warn(&pdev->dev,
5373 "VF %d attempted to override administratively "
5374 "set MAC address\nReload the VF driver to "
5375 "resume operations\n", vf);
5377 case E1000_VF_SET_PROMISC:
5378 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5380 case E1000_VF_SET_MULTICAST:
5381 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5383 case E1000_VF_SET_LPE:
5384 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5386 case E1000_VF_SET_VLAN:
5388 if (vf_data->pf_vlan)
5389 dev_warn(&pdev->dev,
5390 "VF %d attempted to override administratively "
5391 "set VLAN tag\nReload the VF driver to "
5392 "resume operations\n", vf);
5394 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5397 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5402 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5404 /* notify the VF of the results of what it sent us */
5406 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5408 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5410 igb_write_mbx(hw, msgbuf, 1, vf);
5413 static void igb_msg_task(struct igb_adapter *adapter)
5415 struct e1000_hw *hw = &adapter->hw;
5418 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5419 /* process any reset requests */
5420 if (!igb_check_for_rst(hw, vf))
5421 igb_vf_reset_event(adapter, vf);
5423 /* process any messages pending */
5424 if (!igb_check_for_msg(hw, vf))
5425 igb_rcv_msg_from_vf(adapter, vf);
5427 /* process any acks */
5428 if (!igb_check_for_ack(hw, vf))
5429 igb_rcv_ack_from_vf(adapter, vf);
5434 * igb_set_uta - Set unicast filter table address
5435 * @adapter: board private structure
5437 * The unicast table address is a register array of 32-bit registers.
5438 * The table is meant to be used in a way similar to how the MTA is used
5439 * however due to certain limitations in the hardware it is necessary to
5440 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5441 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5443 static void igb_set_uta(struct igb_adapter *adapter)
5445 struct e1000_hw *hw = &adapter->hw;
5448 /* The UTA table only exists on 82576 hardware and newer */
5449 if (hw->mac.type < e1000_82576)
5452 /* we only need to do this if VMDq is enabled */
5453 if (!adapter->vfs_allocated_count)
5456 for (i = 0; i < hw->mac.uta_reg_count; i++)
5457 array_wr32(E1000_UTA, i, ~0);
5461 * igb_intr_msi - Interrupt Handler
5462 * @irq: interrupt number
5463 * @data: pointer to a network interface device structure
5465 static irqreturn_t igb_intr_msi(int irq, void *data)
5467 struct igb_adapter *adapter = data;
5468 struct igb_q_vector *q_vector = adapter->q_vector[0];
5469 struct e1000_hw *hw = &adapter->hw;
5470 /* read ICR disables interrupts using IAM */
5471 u32 icr = rd32(E1000_ICR);
5473 igb_write_itr(q_vector);
5475 if (icr & E1000_ICR_DRSTA)
5476 schedule_work(&adapter->reset_task);
5478 if (icr & E1000_ICR_DOUTSYNC) {
5479 /* HW is reporting DMA is out of sync */
5480 adapter->stats.doosync++;
5483 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5484 hw->mac.get_link_status = 1;
5485 if (!test_bit(__IGB_DOWN, &adapter->state))
5486 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5489 napi_schedule(&q_vector->napi);
5495 * igb_intr - Legacy Interrupt Handler
5496 * @irq: interrupt number
5497 * @data: pointer to a network interface device structure
5499 static irqreturn_t igb_intr(int irq, void *data)
5501 struct igb_adapter *adapter = data;
5502 struct igb_q_vector *q_vector = adapter->q_vector[0];
5503 struct e1000_hw *hw = &adapter->hw;
5504 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5505 * need for the IMC write */
5506 u32 icr = rd32(E1000_ICR);
5508 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5509 * not set, then the adapter didn't send an interrupt */
5510 if (!(icr & E1000_ICR_INT_ASSERTED))
5513 igb_write_itr(q_vector);
5515 if (icr & E1000_ICR_DRSTA)
5516 schedule_work(&adapter->reset_task);
5518 if (icr & E1000_ICR_DOUTSYNC) {
5519 /* HW is reporting DMA is out of sync */
5520 adapter->stats.doosync++;
5523 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5524 hw->mac.get_link_status = 1;
5525 /* guard against interrupt when we're going down */
5526 if (!test_bit(__IGB_DOWN, &adapter->state))
5527 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5530 napi_schedule(&q_vector->napi);
5535 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5537 struct igb_adapter *adapter = q_vector->adapter;
5538 struct e1000_hw *hw = &adapter->hw;
5540 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5541 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5542 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5543 igb_set_itr(q_vector);
5545 igb_update_ring_itr(q_vector);
5548 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5549 if (adapter->msix_entries)
5550 wr32(E1000_EIMS, q_vector->eims_value);
5552 igb_irq_enable(adapter);
5557 * igb_poll - NAPI Rx polling callback
5558 * @napi: napi polling structure
5559 * @budget: count of how many packets we should handle
5561 static int igb_poll(struct napi_struct *napi, int budget)
5563 struct igb_q_vector *q_vector = container_of(napi,
5564 struct igb_q_vector,
5566 bool clean_complete = true;
5568 #ifdef CONFIG_IGB_DCA
5569 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5570 igb_update_dca(q_vector);
5572 if (q_vector->tx.ring)
5573 clean_complete = igb_clean_tx_irq(q_vector);
5575 if (q_vector->rx.ring)
5576 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5578 /* If all work not completed, return budget and keep polling */
5579 if (!clean_complete)
5582 /* If not enough Rx work done, exit the polling mode */
5583 napi_complete(napi);
5584 igb_ring_irq_enable(q_vector);
5590 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5591 * @adapter: board private structure
5592 * @shhwtstamps: timestamp structure to update
5593 * @regval: unsigned 64bit system time value.
5595 * We need to convert the system time value stored in the RX/TXSTMP registers
5596 * into a hwtstamp which can be used by the upper level timestamping functions
5598 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5599 struct skb_shared_hwtstamps *shhwtstamps,
5605 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5606 * 24 to match clock shift we setup earlier.
5608 if (adapter->hw.mac.type == e1000_82580)
5609 regval <<= IGB_82580_TSYNC_SHIFT;
5611 ns = timecounter_cyc2time(&adapter->clock, regval);
5612 timecompare_update(&adapter->compare, ns);
5613 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5614 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5615 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5619 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5620 * @q_vector: pointer to q_vector containing needed info
5621 * @buffer: pointer to igb_tx_buffer structure
5623 * If we were asked to do hardware stamping and such a time stamp is
5624 * available, then it must have been for this skb here because we only
5625 * allow only one such packet into the queue.
5627 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5628 struct igb_tx_buffer *buffer_info)
5630 struct igb_adapter *adapter = q_vector->adapter;
5631 struct e1000_hw *hw = &adapter->hw;
5632 struct skb_shared_hwtstamps shhwtstamps;
5635 /* if skb does not support hw timestamp or TX stamp not valid exit */
5636 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5637 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5640 regval = rd32(E1000_TXSTMPL);
5641 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5643 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5644 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5648 * igb_clean_tx_irq - Reclaim resources after transmit completes
5649 * @q_vector: pointer to q_vector containing needed info
5650 * returns true if ring is completely cleaned
5652 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5654 struct igb_adapter *adapter = q_vector->adapter;
5655 struct igb_ring *tx_ring = q_vector->tx.ring;
5656 struct igb_tx_buffer *tx_buffer;
5657 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5658 unsigned int total_bytes = 0, total_packets = 0;
5659 unsigned int budget = q_vector->tx.work_limit;
5660 unsigned int i = tx_ring->next_to_clean;
5662 if (test_bit(__IGB_DOWN, &adapter->state))
5665 tx_buffer = &tx_ring->tx_buffer_info[i];
5666 tx_desc = IGB_TX_DESC(tx_ring, i);
5667 i -= tx_ring->count;
5669 for (; budget; budget--) {
5670 eop_desc = tx_buffer->next_to_watch;
5672 /* prevent any other reads prior to eop_desc */
5675 /* if next_to_watch is not set then there is no work pending */
5679 /* if DD is not set pending work has not been completed */
5680 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5683 /* clear next_to_watch to prevent false hangs */
5684 tx_buffer->next_to_watch = NULL;
5686 /* update the statistics for this packet */
5687 total_bytes += tx_buffer->bytecount;
5688 total_packets += tx_buffer->gso_segs;
5690 /* retrieve hardware timestamp */
5691 igb_tx_hwtstamp(q_vector, tx_buffer);
5694 dev_kfree_skb_any(tx_buffer->skb);
5695 tx_buffer->skb = NULL;
5697 /* unmap skb header data */
5698 dma_unmap_single(tx_ring->dev,
5703 /* clear last DMA location and unmap remaining buffers */
5704 while (tx_desc != eop_desc) {
5711 i -= tx_ring->count;
5712 tx_buffer = tx_ring->tx_buffer_info;
5713 tx_desc = IGB_TX_DESC(tx_ring, 0);
5716 /* unmap any remaining paged data */
5717 if (tx_buffer->dma) {
5718 dma_unmap_page(tx_ring->dev,
5725 /* clear last DMA location */
5728 /* move us one more past the eop_desc for start of next pkt */
5733 i -= tx_ring->count;
5734 tx_buffer = tx_ring->tx_buffer_info;
5735 tx_desc = IGB_TX_DESC(tx_ring, 0);
5739 i += tx_ring->count;
5740 tx_ring->next_to_clean = i;
5741 u64_stats_update_begin(&tx_ring->tx_syncp);
5742 tx_ring->tx_stats.bytes += total_bytes;
5743 tx_ring->tx_stats.packets += total_packets;
5744 u64_stats_update_end(&tx_ring->tx_syncp);
5745 q_vector->tx.total_bytes += total_bytes;
5746 q_vector->tx.total_packets += total_packets;
5748 if (tx_ring->detect_tx_hung) {
5749 struct e1000_hw *hw = &adapter->hw;
5751 eop_desc = tx_buffer->next_to_watch;
5753 /* Detect a transmit hang in hardware, this serializes the
5754 * check with the clearing of time_stamp and movement of i */
5755 tx_ring->detect_tx_hung = false;
5757 time_after(jiffies, tx_buffer->time_stamp +
5758 (adapter->tx_timeout_factor * HZ)) &&
5759 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5761 /* detected Tx unit hang */
5762 dev_err(tx_ring->dev,
5763 "Detected Tx Unit Hang\n"
5767 " next_to_use <%x>\n"
5768 " next_to_clean <%x>\n"
5769 "buffer_info[next_to_clean]\n"
5770 " time_stamp <%lx>\n"
5771 " next_to_watch <%p>\n"
5773 " desc.status <%x>\n",
5774 tx_ring->queue_index,
5775 rd32(E1000_TDH(tx_ring->reg_idx)),
5776 readl(tx_ring->tail),
5777 tx_ring->next_to_use,
5778 tx_ring->next_to_clean,
5779 tx_buffer->time_stamp,
5782 eop_desc->wb.status);
5783 netif_stop_subqueue(tx_ring->netdev,
5784 tx_ring->queue_index);
5786 /* we are about to reset, no point in enabling stuff */
5791 if (unlikely(total_packets &&
5792 netif_carrier_ok(tx_ring->netdev) &&
5793 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5794 /* Make sure that anybody stopping the queue after this
5795 * sees the new next_to_clean.
5798 if (__netif_subqueue_stopped(tx_ring->netdev,
5799 tx_ring->queue_index) &&
5800 !(test_bit(__IGB_DOWN, &adapter->state))) {
5801 netif_wake_subqueue(tx_ring->netdev,
5802 tx_ring->queue_index);
5804 u64_stats_update_begin(&tx_ring->tx_syncp);
5805 tx_ring->tx_stats.restart_queue++;
5806 u64_stats_update_end(&tx_ring->tx_syncp);
5813 static inline void igb_rx_checksum(struct igb_ring *ring,
5814 u32 status_err, struct sk_buff *skb)
5816 skb_checksum_none_assert(skb);
5818 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5819 if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags) ||
5820 (status_err & E1000_RXD_STAT_IXSM))
5823 /* TCP/UDP checksum error bit is set */
5825 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5827 * work around errata with sctp packets where the TCPE aka
5828 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5829 * packets, (aka let the stack check the crc32c)
5831 if (!((skb->len == 60) &&
5832 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5833 u64_stats_update_begin(&ring->rx_syncp);
5834 ring->rx_stats.csum_err++;
5835 u64_stats_update_end(&ring->rx_syncp);
5837 /* let the stack verify checksum errors */
5840 /* It must be a TCP or UDP packet with a valid checksum */
5841 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5842 skb->ip_summed = CHECKSUM_UNNECESSARY;
5844 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5847 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5848 struct sk_buff *skb)
5850 struct igb_adapter *adapter = q_vector->adapter;
5851 struct e1000_hw *hw = &adapter->hw;
5855 * If this bit is set, then the RX registers contain the time stamp. No
5856 * other packet will be time stamped until we read these registers, so
5857 * read the registers to make them available again. Because only one
5858 * packet can be time stamped at a time, we know that the register
5859 * values must belong to this one here and therefore we don't need to
5860 * compare any of the additional attributes stored for it.
5862 * If nothing went wrong, then it should have a shared tx_flags that we
5863 * can turn into a skb_shared_hwtstamps.
5865 if (staterr & E1000_RXDADV_STAT_TSIP) {
5866 u32 *stamp = (u32 *)skb->data;
5867 regval = le32_to_cpu(*(stamp + 2));
5868 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5869 skb_pull(skb, IGB_TS_HDR_LEN);
5871 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5874 regval = rd32(E1000_RXSTMPL);
5875 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5878 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5880 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5882 /* HW will not DMA in data larger than the given buffer, even if it
5883 * parses the (NFS, of course) header to be larger. In that case, it
5884 * fills the header buffer and spills the rest into the page.
5886 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5887 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5888 if (hlen > IGB_RX_HDR_LEN)
5889 hlen = IGB_RX_HDR_LEN;
5893 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5895 struct igb_ring *rx_ring = q_vector->rx.ring;
5896 union e1000_adv_rx_desc *rx_desc;
5897 const int current_node = numa_node_id();
5898 unsigned int total_bytes = 0, total_packets = 0;
5900 u16 cleaned_count = igb_desc_unused(rx_ring);
5901 u16 i = rx_ring->next_to_clean;
5903 rx_desc = IGB_RX_DESC(rx_ring, i);
5904 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5906 while (staterr & E1000_RXD_STAT_DD) {
5907 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5908 struct sk_buff *skb = buffer_info->skb;
5909 union e1000_adv_rx_desc *next_rxd;
5911 buffer_info->skb = NULL;
5912 prefetch(skb->data);
5915 if (i == rx_ring->count)
5918 next_rxd = IGB_RX_DESC(rx_ring, i);
5922 * This memory barrier is needed to keep us from reading
5923 * any other fields out of the rx_desc until we know the
5924 * RXD_STAT_DD bit is set
5928 if (!skb_is_nonlinear(skb)) {
5929 __skb_put(skb, igb_get_hlen(rx_desc));
5930 dma_unmap_single(rx_ring->dev, buffer_info->dma,
5933 buffer_info->dma = 0;
5936 if (rx_desc->wb.upper.length) {
5937 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5939 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5941 buffer_info->page_offset,
5945 skb->data_len += length;
5946 skb->truesize += length;
5948 if ((page_count(buffer_info->page) != 1) ||
5949 (page_to_nid(buffer_info->page) != current_node))
5950 buffer_info->page = NULL;
5952 get_page(buffer_info->page);
5954 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5955 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5956 buffer_info->page_dma = 0;
5959 if (!(staterr & E1000_RXD_STAT_EOP)) {
5960 struct igb_rx_buffer *next_buffer;
5961 next_buffer = &rx_ring->rx_buffer_info[i];
5962 buffer_info->skb = next_buffer->skb;
5963 buffer_info->dma = next_buffer->dma;
5964 next_buffer->skb = skb;
5965 next_buffer->dma = 0;
5969 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5970 dev_kfree_skb_any(skb);
5974 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5975 igb_rx_hwtstamp(q_vector, staterr, skb);
5976 total_bytes += skb->len;
5979 igb_rx_checksum(rx_ring, staterr, skb);
5981 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5983 if (staterr & E1000_RXD_STAT_VP) {
5984 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5986 __vlan_hwaccel_put_tag(skb, vid);
5988 napi_gro_receive(&q_vector->napi, skb);
5996 /* return some buffers to hardware, one at a time is too slow */
5997 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5998 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6002 /* use prefetched values */
6004 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
6007 rx_ring->next_to_clean = i;
6008 u64_stats_update_begin(&rx_ring->rx_syncp);
6009 rx_ring->rx_stats.packets += total_packets;
6010 rx_ring->rx_stats.bytes += total_bytes;
6011 u64_stats_update_end(&rx_ring->rx_syncp);
6012 q_vector->rx.total_packets += total_packets;
6013 q_vector->rx.total_bytes += total_bytes;
6016 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6021 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6022 struct igb_rx_buffer *bi)
6024 struct sk_buff *skb = bi->skb;
6025 dma_addr_t dma = bi->dma;
6031 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6035 rx_ring->rx_stats.alloc_failed++;
6039 /* initialize skb for ring */
6040 skb_record_rx_queue(skb, rx_ring->queue_index);
6043 dma = dma_map_single(rx_ring->dev, skb->data,
6044 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6046 if (dma_mapping_error(rx_ring->dev, dma)) {
6047 rx_ring->rx_stats.alloc_failed++;
6055 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6056 struct igb_rx_buffer *bi)
6058 struct page *page = bi->page;
6059 dma_addr_t page_dma = bi->page_dma;
6060 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6066 page = netdev_alloc_page(rx_ring->netdev);
6068 if (unlikely(!page)) {
6069 rx_ring->rx_stats.alloc_failed++;
6074 page_dma = dma_map_page(rx_ring->dev, page,
6075 page_offset, PAGE_SIZE / 2,
6078 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6079 rx_ring->rx_stats.alloc_failed++;
6083 bi->page_dma = page_dma;
6084 bi->page_offset = page_offset;
6089 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6090 * @adapter: address of board private structure
6092 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6094 union e1000_adv_rx_desc *rx_desc;
6095 struct igb_rx_buffer *bi;
6096 u16 i = rx_ring->next_to_use;
6098 rx_desc = IGB_RX_DESC(rx_ring, i);
6099 bi = &rx_ring->rx_buffer_info[i];
6100 i -= rx_ring->count;
6102 while (cleaned_count--) {
6103 if (!igb_alloc_mapped_skb(rx_ring, bi))
6106 /* Refresh the desc even if buffer_addrs didn't change
6107 * because each write-back erases this info. */
6108 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6110 if (!igb_alloc_mapped_page(rx_ring, bi))
6113 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6119 rx_desc = IGB_RX_DESC(rx_ring, 0);
6120 bi = rx_ring->rx_buffer_info;
6121 i -= rx_ring->count;
6124 /* clear the hdr_addr for the next_to_use descriptor */
6125 rx_desc->read.hdr_addr = 0;
6128 i += rx_ring->count;
6130 if (rx_ring->next_to_use != i) {
6131 rx_ring->next_to_use = i;
6133 /* Force memory writes to complete before letting h/w
6134 * know there are new descriptors to fetch. (Only
6135 * applicable for weak-ordered memory model archs,
6136 * such as IA-64). */
6138 writel(i, rx_ring->tail);
6148 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6150 struct igb_adapter *adapter = netdev_priv(netdev);
6151 struct mii_ioctl_data *data = if_mii(ifr);
6153 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6158 data->phy_id = adapter->hw.phy.addr;
6161 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6173 * igb_hwtstamp_ioctl - control hardware time stamping
6178 * Outgoing time stamping can be enabled and disabled. Play nice and
6179 * disable it when requested, although it shouldn't case any overhead
6180 * when no packet needs it. At most one packet in the queue may be
6181 * marked for time stamping, otherwise it would be impossible to tell
6182 * for sure to which packet the hardware time stamp belongs.
6184 * Incoming time stamping has to be configured via the hardware
6185 * filters. Not all combinations are supported, in particular event
6186 * type has to be specified. Matching the kind of event packet is
6187 * not supported, with the exception of "all V2 events regardless of
6191 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6192 struct ifreq *ifr, int cmd)
6194 struct igb_adapter *adapter = netdev_priv(netdev);
6195 struct e1000_hw *hw = &adapter->hw;
6196 struct hwtstamp_config config;
6197 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6198 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6199 u32 tsync_rx_cfg = 0;
6204 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6207 /* reserved for future extensions */
6211 switch (config.tx_type) {
6212 case HWTSTAMP_TX_OFF:
6214 case HWTSTAMP_TX_ON:
6220 switch (config.rx_filter) {
6221 case HWTSTAMP_FILTER_NONE:
6224 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6225 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6226 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6227 case HWTSTAMP_FILTER_ALL:
6229 * register TSYNCRXCFG must be set, therefore it is not
6230 * possible to time stamp both Sync and Delay_Req messages
6231 * => fall back to time stamping all packets
6233 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6234 config.rx_filter = HWTSTAMP_FILTER_ALL;
6236 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6237 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6238 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6241 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6242 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6243 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6246 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6247 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6248 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6249 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6252 config.rx_filter = HWTSTAMP_FILTER_SOME;
6254 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6255 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6256 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6257 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6260 config.rx_filter = HWTSTAMP_FILTER_SOME;
6262 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6263 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6264 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6265 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6266 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6273 if (hw->mac.type == e1000_82575) {
6274 if (tsync_rx_ctl | tsync_tx_ctl)
6280 * Per-packet timestamping only works if all packets are
6281 * timestamped, so enable timestamping in all packets as
6282 * long as one rx filter was configured.
6284 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6285 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6286 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6289 /* enable/disable TX */
6290 regval = rd32(E1000_TSYNCTXCTL);
6291 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6292 regval |= tsync_tx_ctl;
6293 wr32(E1000_TSYNCTXCTL, regval);
6295 /* enable/disable RX */
6296 regval = rd32(E1000_TSYNCRXCTL);
6297 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6298 regval |= tsync_rx_ctl;
6299 wr32(E1000_TSYNCRXCTL, regval);
6301 /* define which PTP packets are time stamped */
6302 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6304 /* define ethertype filter for timestamped packets */
6307 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6308 E1000_ETQF_1588 | /* enable timestamping */
6309 ETH_P_1588)); /* 1588 eth protocol type */
6311 wr32(E1000_ETQF(3), 0);
6313 #define PTP_PORT 319
6314 /* L4 Queue Filter[3]: filter by destination port and protocol */
6316 u32 ftqf = (IPPROTO_UDP /* UDP */
6317 | E1000_FTQF_VF_BP /* VF not compared */
6318 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6319 | E1000_FTQF_MASK); /* mask all inputs */
6320 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6322 wr32(E1000_IMIR(3), htons(PTP_PORT));
6323 wr32(E1000_IMIREXT(3),
6324 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6325 if (hw->mac.type == e1000_82576) {
6326 /* enable source port check */
6327 wr32(E1000_SPQF(3), htons(PTP_PORT));
6328 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6330 wr32(E1000_FTQF(3), ftqf);
6332 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6336 adapter->hwtstamp_config = config;
6338 /* clear TX/RX time stamp registers, just to be sure */
6339 regval = rd32(E1000_TXSTMPH);
6340 regval = rd32(E1000_RXSTMPH);
6342 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6352 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6358 return igb_mii_ioctl(netdev, ifr, cmd);
6360 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6366 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6368 struct igb_adapter *adapter = hw->back;
6371 cap_offset = adapter->pdev->pcie_cap;
6373 return -E1000_ERR_CONFIG;
6375 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6380 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6382 struct igb_adapter *adapter = hw->back;
6385 cap_offset = adapter->pdev->pcie_cap;
6387 return -E1000_ERR_CONFIG;
6389 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6394 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6396 struct igb_adapter *adapter = netdev_priv(netdev);
6397 struct e1000_hw *hw = &adapter->hw;
6400 igb_irq_disable(adapter);
6402 if (features & NETIF_F_HW_VLAN_RX) {
6403 /* enable VLAN tag insert/strip */
6404 ctrl = rd32(E1000_CTRL);
6405 ctrl |= E1000_CTRL_VME;
6406 wr32(E1000_CTRL, ctrl);
6408 /* Disable CFI check */
6409 rctl = rd32(E1000_RCTL);
6410 rctl &= ~E1000_RCTL_CFIEN;
6411 wr32(E1000_RCTL, rctl);
6413 /* disable VLAN tag insert/strip */
6414 ctrl = rd32(E1000_CTRL);
6415 ctrl &= ~E1000_CTRL_VME;
6416 wr32(E1000_CTRL, ctrl);
6419 igb_rlpml_set(adapter);
6421 if (!test_bit(__IGB_DOWN, &adapter->state))
6422 igb_irq_enable(adapter);
6425 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6427 struct igb_adapter *adapter = netdev_priv(netdev);
6428 struct e1000_hw *hw = &adapter->hw;
6429 int pf_id = adapter->vfs_allocated_count;
6431 /* attempt to add filter to vlvf array */
6432 igb_vlvf_set(adapter, vid, true, pf_id);
6434 /* add the filter since PF can receive vlans w/o entry in vlvf */
6435 igb_vfta_set(hw, vid, true);
6437 set_bit(vid, adapter->active_vlans);
6440 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6442 struct igb_adapter *adapter = netdev_priv(netdev);
6443 struct e1000_hw *hw = &adapter->hw;
6444 int pf_id = adapter->vfs_allocated_count;
6447 igb_irq_disable(adapter);
6449 if (!test_bit(__IGB_DOWN, &adapter->state))
6450 igb_irq_enable(adapter);
6452 /* remove vlan from VLVF table array */
6453 err = igb_vlvf_set(adapter, vid, false, pf_id);
6455 /* if vid was not present in VLVF just remove it from table */
6457 igb_vfta_set(hw, vid, false);
6459 clear_bit(vid, adapter->active_vlans);
6462 static void igb_restore_vlan(struct igb_adapter *adapter)
6466 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6467 igb_vlan_rx_add_vid(adapter->netdev, vid);
6470 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6472 struct pci_dev *pdev = adapter->pdev;
6473 struct e1000_mac_info *mac = &adapter->hw.mac;
6477 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6478 * for the switch() below to work */
6479 if ((spd & 1) || (dplx & ~1))
6482 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6483 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6484 spd != SPEED_1000 &&
6485 dplx != DUPLEX_FULL)
6488 switch (spd + dplx) {
6489 case SPEED_10 + DUPLEX_HALF:
6490 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6492 case SPEED_10 + DUPLEX_FULL:
6493 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6495 case SPEED_100 + DUPLEX_HALF:
6496 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6498 case SPEED_100 + DUPLEX_FULL:
6499 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6501 case SPEED_1000 + DUPLEX_FULL:
6503 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6505 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6512 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6516 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6518 struct net_device *netdev = pci_get_drvdata(pdev);
6519 struct igb_adapter *adapter = netdev_priv(netdev);
6520 struct e1000_hw *hw = &adapter->hw;
6521 u32 ctrl, rctl, status;
6522 u32 wufc = adapter->wol;
6527 netif_device_detach(netdev);
6529 if (netif_running(netdev))
6532 igb_clear_interrupt_scheme(adapter);
6535 retval = pci_save_state(pdev);
6540 status = rd32(E1000_STATUS);
6541 if (status & E1000_STATUS_LU)
6542 wufc &= ~E1000_WUFC_LNKC;
6545 igb_setup_rctl(adapter);
6546 igb_set_rx_mode(netdev);
6548 /* turn on all-multi mode if wake on multicast is enabled */
6549 if (wufc & E1000_WUFC_MC) {
6550 rctl = rd32(E1000_RCTL);
6551 rctl |= E1000_RCTL_MPE;
6552 wr32(E1000_RCTL, rctl);
6555 ctrl = rd32(E1000_CTRL);
6556 /* advertise wake from D3Cold */
6557 #define E1000_CTRL_ADVD3WUC 0x00100000
6558 /* phy power management enable */
6559 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6560 ctrl |= E1000_CTRL_ADVD3WUC;
6561 wr32(E1000_CTRL, ctrl);
6563 /* Allow time for pending master requests to run */
6564 igb_disable_pcie_master(hw);
6566 wr32(E1000_WUC, E1000_WUC_PME_EN);
6567 wr32(E1000_WUFC, wufc);
6570 wr32(E1000_WUFC, 0);
6573 *enable_wake = wufc || adapter->en_mng_pt;
6575 igb_power_down_link(adapter);
6577 igb_power_up_link(adapter);
6579 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6580 * would have already happened in close and is redundant. */
6581 igb_release_hw_control(adapter);
6583 pci_disable_device(pdev);
6589 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6594 retval = __igb_shutdown(pdev, &wake);
6599 pci_prepare_to_sleep(pdev);
6601 pci_wake_from_d3(pdev, false);
6602 pci_set_power_state(pdev, PCI_D3hot);
6608 static int igb_resume(struct pci_dev *pdev)
6610 struct net_device *netdev = pci_get_drvdata(pdev);
6611 struct igb_adapter *adapter = netdev_priv(netdev);
6612 struct e1000_hw *hw = &adapter->hw;
6615 pci_set_power_state(pdev, PCI_D0);
6616 pci_restore_state(pdev);
6617 pci_save_state(pdev);
6619 err = pci_enable_device_mem(pdev);
6622 "igb: Cannot enable PCI device from suspend\n");
6625 pci_set_master(pdev);
6627 pci_enable_wake(pdev, PCI_D3hot, 0);
6628 pci_enable_wake(pdev, PCI_D3cold, 0);
6630 if (igb_init_interrupt_scheme(adapter)) {
6631 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6637 /* let the f/w know that the h/w is now under the control of the
6639 igb_get_hw_control(adapter);
6641 wr32(E1000_WUS, ~0);
6643 if (netif_running(netdev)) {
6644 err = igb_open(netdev);
6649 netif_device_attach(netdev);
6655 static void igb_shutdown(struct pci_dev *pdev)
6659 __igb_shutdown(pdev, &wake);
6661 if (system_state == SYSTEM_POWER_OFF) {
6662 pci_wake_from_d3(pdev, wake);
6663 pci_set_power_state(pdev, PCI_D3hot);
6667 #ifdef CONFIG_NET_POLL_CONTROLLER
6669 * Polling 'interrupt' - used by things like netconsole to send skbs
6670 * without having to re-enable interrupts. It's not called while
6671 * the interrupt routine is executing.
6673 static void igb_netpoll(struct net_device *netdev)
6675 struct igb_adapter *adapter = netdev_priv(netdev);
6676 struct e1000_hw *hw = &adapter->hw;
6679 if (!adapter->msix_entries) {
6680 struct igb_q_vector *q_vector = adapter->q_vector[0];
6681 igb_irq_disable(adapter);
6682 napi_schedule(&q_vector->napi);
6686 for (i = 0; i < adapter->num_q_vectors; i++) {
6687 struct igb_q_vector *q_vector = adapter->q_vector[i];
6688 wr32(E1000_EIMC, q_vector->eims_value);
6689 napi_schedule(&q_vector->napi);
6692 #endif /* CONFIG_NET_POLL_CONTROLLER */
6695 * igb_io_error_detected - called when PCI error is detected
6696 * @pdev: Pointer to PCI device
6697 * @state: The current pci connection state
6699 * This function is called after a PCI bus error affecting
6700 * this device has been detected.
6702 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6703 pci_channel_state_t state)
6705 struct net_device *netdev = pci_get_drvdata(pdev);
6706 struct igb_adapter *adapter = netdev_priv(netdev);
6708 netif_device_detach(netdev);
6710 if (state == pci_channel_io_perm_failure)
6711 return PCI_ERS_RESULT_DISCONNECT;
6713 if (netif_running(netdev))
6715 pci_disable_device(pdev);
6717 /* Request a slot slot reset. */
6718 return PCI_ERS_RESULT_NEED_RESET;
6722 * igb_io_slot_reset - called after the pci bus has been reset.
6723 * @pdev: Pointer to PCI device
6725 * Restart the card from scratch, as if from a cold-boot. Implementation
6726 * resembles the first-half of the igb_resume routine.
6728 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6730 struct net_device *netdev = pci_get_drvdata(pdev);
6731 struct igb_adapter *adapter = netdev_priv(netdev);
6732 struct e1000_hw *hw = &adapter->hw;
6733 pci_ers_result_t result;
6736 if (pci_enable_device_mem(pdev)) {
6738 "Cannot re-enable PCI device after reset.\n");
6739 result = PCI_ERS_RESULT_DISCONNECT;
6741 pci_set_master(pdev);
6742 pci_restore_state(pdev);
6743 pci_save_state(pdev);
6745 pci_enable_wake(pdev, PCI_D3hot, 0);
6746 pci_enable_wake(pdev, PCI_D3cold, 0);
6749 wr32(E1000_WUS, ~0);
6750 result = PCI_ERS_RESULT_RECOVERED;
6753 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6755 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6756 "failed 0x%0x\n", err);
6757 /* non-fatal, continue */
6764 * igb_io_resume - called when traffic can start flowing again.
6765 * @pdev: Pointer to PCI device
6767 * This callback is called when the error recovery driver tells us that
6768 * its OK to resume normal operation. Implementation resembles the
6769 * second-half of the igb_resume routine.
6771 static void igb_io_resume(struct pci_dev *pdev)
6773 struct net_device *netdev = pci_get_drvdata(pdev);
6774 struct igb_adapter *adapter = netdev_priv(netdev);
6776 if (netif_running(netdev)) {
6777 if (igb_up(adapter)) {
6778 dev_err(&pdev->dev, "igb_up failed after reset\n");
6783 netif_device_attach(netdev);
6785 /* let the f/w know that the h/w is now under the control of the
6787 igb_get_hw_control(adapter);
6790 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6793 u32 rar_low, rar_high;
6794 struct e1000_hw *hw = &adapter->hw;
6796 /* HW expects these in little endian so we reverse the byte order
6797 * from network order (big endian) to little endian
6799 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6800 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6801 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6803 /* Indicate to hardware the Address is Valid. */
6804 rar_high |= E1000_RAH_AV;
6806 if (hw->mac.type == e1000_82575)
6807 rar_high |= E1000_RAH_POOL_1 * qsel;
6809 rar_high |= E1000_RAH_POOL_1 << qsel;
6811 wr32(E1000_RAL(index), rar_low);
6813 wr32(E1000_RAH(index), rar_high);
6817 static int igb_set_vf_mac(struct igb_adapter *adapter,
6818 int vf, unsigned char *mac_addr)
6820 struct e1000_hw *hw = &adapter->hw;
6821 /* VF MAC addresses start at end of receive addresses and moves
6822 * torwards the first, as a result a collision should not be possible */
6823 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6825 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6827 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6832 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6834 struct igb_adapter *adapter = netdev_priv(netdev);
6835 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6837 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6838 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6839 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6840 " change effective.");
6841 if (test_bit(__IGB_DOWN, &adapter->state)) {
6842 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6843 " but the PF device is not up.\n");
6844 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6845 " attempting to use the VF device.\n");
6847 return igb_set_vf_mac(adapter, vf, mac);
6850 static int igb_link_mbps(int internal_link_speed)
6852 switch (internal_link_speed) {
6862 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6869 /* Calculate the rate factor values to set */
6870 rf_int = link_speed / tx_rate;
6871 rf_dec = (link_speed - (rf_int * tx_rate));
6872 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6874 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6875 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6876 E1000_RTTBCNRC_RF_INT_MASK);
6877 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6882 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6883 wr32(E1000_RTTBCNRC, bcnrc_val);
6886 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6888 int actual_link_speed, i;
6889 bool reset_rate = false;
6891 /* VF TX rate limit was not set or not supported */
6892 if ((adapter->vf_rate_link_speed == 0) ||
6893 (adapter->hw.mac.type != e1000_82576))
6896 actual_link_speed = igb_link_mbps(adapter->link_speed);
6897 if (actual_link_speed != adapter->vf_rate_link_speed) {
6899 adapter->vf_rate_link_speed = 0;
6900 dev_info(&adapter->pdev->dev,
6901 "Link speed has been changed. VF Transmit "
6902 "rate is disabled\n");
6905 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6907 adapter->vf_data[i].tx_rate = 0;
6909 igb_set_vf_rate_limit(&adapter->hw, i,
6910 adapter->vf_data[i].tx_rate,
6915 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6917 struct igb_adapter *adapter = netdev_priv(netdev);
6918 struct e1000_hw *hw = &adapter->hw;
6919 int actual_link_speed;
6921 if (hw->mac.type != e1000_82576)
6924 actual_link_speed = igb_link_mbps(adapter->link_speed);
6925 if ((vf >= adapter->vfs_allocated_count) ||
6926 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6927 (tx_rate < 0) || (tx_rate > actual_link_speed))
6930 adapter->vf_rate_link_speed = actual_link_speed;
6931 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6932 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6937 static int igb_ndo_get_vf_config(struct net_device *netdev,
6938 int vf, struct ifla_vf_info *ivi)
6940 struct igb_adapter *adapter = netdev_priv(netdev);
6941 if (vf >= adapter->vfs_allocated_count)
6944 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6945 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6946 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6947 ivi->qos = adapter->vf_data[vf].pf_qos;
6951 static void igb_vmm_control(struct igb_adapter *adapter)
6953 struct e1000_hw *hw = &adapter->hw;
6956 switch (hw->mac.type) {
6959 /* replication is not supported for 82575 */
6962 /* notify HW that the MAC is adding vlan tags */
6963 reg = rd32(E1000_DTXCTL);
6964 reg |= E1000_DTXCTL_VLAN_ADDED;
6965 wr32(E1000_DTXCTL, reg);
6967 /* enable replication vlan tag stripping */
6968 reg = rd32(E1000_RPLOLR);
6969 reg |= E1000_RPLOLR_STRVLAN;
6970 wr32(E1000_RPLOLR, reg);
6972 /* none of the above registers are supported by i350 */
6976 if (adapter->vfs_allocated_count) {
6977 igb_vmdq_set_loopback_pf(hw, true);
6978 igb_vmdq_set_replication_pf(hw, true);
6979 igb_vmdq_set_anti_spoofing_pf(hw, true,
6980 adapter->vfs_allocated_count);
6982 igb_vmdq_set_loopback_pf(hw, false);
6983 igb_vmdq_set_replication_pf(hw, false);