2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
45 #include <linux/if_vlan.h>
46 #include <linux/init.h>
47 #include <linux/log2.h>
48 #include <linux/mdio.h>
49 #include <linux/module.h>
50 #include <linux/moduleparam.h>
51 #include <linux/mutex.h>
52 #include <linux/netdevice.h>
53 #include <linux/pci.h>
54 #include <linux/aer.h>
55 #include <linux/rtnetlink.h>
56 #include <linux/sched.h>
57 #include <linux/seq_file.h>
58 #include <linux/sockios.h>
59 #include <linux/vmalloc.h>
60 #include <linux/workqueue.h>
61 #include <net/neighbour.h>
62 #include <net/netevent.h>
63 #include <net/addrconf.h>
64 #include <net/bonding.h>
65 #include <net/addrconf.h>
66 #include <asm/uaccess.h>
70 #include "t4_values.h"
73 #include "t4fw_version.h"
74 #include "cxgb4_dcb.h"
75 #include "cxgb4_debugfs.h"
82 #define DRV_VERSION "2.0.0-ko"
83 #define DRV_DESC "Chelsio T4/T5 Network Driver"
86 MAX_TXQ_ENTRIES = 16384,
87 MAX_CTRL_TXQ_ENTRIES = 1024,
88 MAX_RSPQ_ENTRIES = 16384,
89 MAX_RX_BUFFERS = 16384,
91 MIN_CTRL_TXQ_ENTRIES = 32,
92 MIN_RSPQ_ENTRIES = 128,
96 /* Host shadow copy of ingress filter entry. This is in host native format
97 * and doesn't match the ordering or bit order, etc. of the hardware of the
98 * firmware command. The use of bit-field structure elements is purely to
99 * remind ourselves of the field size limitations and save memory in the case
100 * where the filter table is large.
102 struct filter_entry {
103 /* Administrative fields for filter.
105 u32 valid:1; /* filter allocated and valid */
106 u32 locked:1; /* filter is administratively locked */
108 u32 pending:1; /* filter action is pending firmware reply */
109 u32 smtidx:8; /* Source MAC Table index for smac */
110 struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
112 /* The filter itself. Most of this is a straight copy of information
113 * provided by the extended ioctl(). Some fields are translated to
114 * internal forms -- for instance the Ingress Queue ID passed in from
115 * the ioctl() is translated into the Absolute Ingress Queue ID.
117 struct ch_filter_specification fs;
120 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
121 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
122 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
124 /* Macros needed to support the PCI Device ID Table ...
126 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
127 static struct pci_device_id cxgb4_pci_tbl[] = {
128 #define CH_PCI_DEVICE_ID_FUNCTION 0x4
130 /* Include PCI Device IDs for both PF4 and PF0-3 so our PCI probe() routine is
133 #define CH_PCI_DEVICE_ID_FUNCTION2 0x0
135 #define CH_PCI_ID_TABLE_ENTRY(devid) \
136 {PCI_VDEVICE(CHELSIO, (devid)), 4}
138 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
142 #include "t4_pci_id_tbl.h"
144 #define FW4_FNAME "cxgb4/t4fw.bin"
145 #define FW5_FNAME "cxgb4/t5fw.bin"
146 #define FW4_CFNAME "cxgb4/t4-config.txt"
147 #define FW5_CFNAME "cxgb4/t5-config.txt"
149 MODULE_DESCRIPTION(DRV_DESC);
150 MODULE_AUTHOR("Chelsio Communications");
151 MODULE_LICENSE("Dual BSD/GPL");
152 MODULE_VERSION(DRV_VERSION);
153 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
154 MODULE_FIRMWARE(FW4_FNAME);
155 MODULE_FIRMWARE(FW5_FNAME);
158 * Normally we're willing to become the firmware's Master PF but will be happy
159 * if another PF has already become the Master and initialized the adapter.
160 * Setting "force_init" will cause this driver to forcibly establish itself as
161 * the Master PF and initialize the adapter.
163 static uint force_init;
165 module_param(force_init, uint, 0644);
166 MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter");
169 * Normally if the firmware we connect to has Configuration File support, we
170 * use that and only fall back to the old Driver-based initialization if the
171 * Configuration File fails for some reason. If force_old_init is set, then
172 * we'll always use the old Driver-based initialization sequence.
174 static uint force_old_init;
176 module_param(force_old_init, uint, 0644);
177 MODULE_PARM_DESC(force_old_init, "Force old initialization sequence, deprecated"
180 static int dflt_msg_enable = DFLT_MSG_ENABLE;
182 module_param(dflt_msg_enable, int, 0644);
183 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
186 * The driver uses the best interrupt scheme available on a platform in the
187 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
188 * of these schemes the driver may consider as follows:
190 * msi = 2: choose from among all three options
191 * msi = 1: only consider MSI and INTx interrupts
192 * msi = 0: force INTx interrupts
196 module_param(msi, int, 0644);
197 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
200 * Queue interrupt hold-off timer values. Queues default to the first of these
203 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
205 module_param_array(intr_holdoff, uint, NULL, 0644);
206 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
207 "0..4 in microseconds, deprecated parameter");
209 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
211 module_param_array(intr_cnt, uint, NULL, 0644);
212 MODULE_PARM_DESC(intr_cnt,
213 "thresholds 1..3 for queue interrupt packet counters, "
214 "deprecated parameter");
217 * Normally we tell the chip to deliver Ingress Packets into our DMA buffers
218 * offset by 2 bytes in order to have the IP headers line up on 4-byte
219 * boundaries. This is a requirement for many architectures which will throw
220 * a machine check fault if an attempt is made to access one of the 4-byte IP
221 * header fields on a non-4-byte boundary. And it's a major performance issue
222 * even on some architectures which allow it like some implementations of the
223 * x86 ISA. However, some architectures don't mind this and for some very
224 * edge-case performance sensitive applications (like forwarding large volumes
225 * of small packets), setting this DMA offset to 0 will decrease the number of
226 * PCI-E Bus transfers enough to measurably affect performance.
228 static int rx_dma_offset = 2;
232 #ifdef CONFIG_PCI_IOV
233 module_param(vf_acls, bool, 0644);
234 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement, "
235 "deprecated parameter");
237 /* Configure the number of PCI-E Virtual Function which are to be instantiated
238 * on SR-IOV Capable Physical Functions.
240 static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
242 module_param_array(num_vf, uint, NULL, 0644);
243 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
246 /* TX Queue select used to determine what algorithm to use for selecting TX
247 * queue. Select between the kernel provided function (select_queue=0) or user
248 * cxgb_select_queue function (select_queue=1)
250 * Default: select_queue=0
252 static int select_queue;
253 module_param(select_queue, int, 0644);
254 MODULE_PARM_DESC(select_queue,
255 "Select between kernel provided method of selecting or driver method of selecting TX queue. Default is kernel method.");
257 static unsigned int tp_vlan_pri_map = HW_TPL_FR_MT_PR_IV_P_FC;
259 module_param(tp_vlan_pri_map, uint, 0644);
260 MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration, "
261 "deprecated parameter");
263 static struct dentry *cxgb4_debugfs_root;
265 static LIST_HEAD(adapter_list);
266 static DEFINE_MUTEX(uld_mutex);
267 /* Adapter list to be accessed from atomic context */
268 static LIST_HEAD(adap_rcu_list);
269 static DEFINE_SPINLOCK(adap_rcu_lock);
270 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
271 static const char *uld_str[] = { "RDMA", "iSCSI" };
273 static void link_report(struct net_device *dev)
275 if (!netif_carrier_ok(dev))
276 netdev_info(dev, "link down\n");
278 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
280 const char *s = "10Mbps";
281 const struct port_info *p = netdev_priv(dev);
283 switch (p->link_cfg.speed) {
298 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
303 #ifdef CONFIG_CHELSIO_T4_DCB
304 /* Set up/tear down Data Center Bridging Priority mapping for a net device. */
305 static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable)
307 struct port_info *pi = netdev_priv(dev);
308 struct adapter *adap = pi->adapter;
309 struct sge_eth_txq *txq = &adap->sge.ethtxq[pi->first_qset];
312 /* We use a simple mapping of Port TX Queue Index to DCB
313 * Priority when we're enabling DCB.
315 for (i = 0; i < pi->nqsets; i++, txq++) {
319 name = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
321 FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH) |
322 FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id));
323 value = enable ? i : 0xffffffff;
325 /* Since we can be called while atomic (from "interrupt
326 * level") we need to issue the Set Parameters Commannd
327 * without sleeping (timeout < 0).
329 err = t4_set_params_nosleep(adap, adap->mbox, adap->fn, 0, 1,
333 dev_err(adap->pdev_dev,
334 "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n",
335 enable ? "set" : "unset", pi->port_id, i, -err);
337 txq->dcb_prio = value;
340 #endif /* CONFIG_CHELSIO_T4_DCB */
342 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
344 struct net_device *dev = adapter->port[port_id];
346 /* Skip changes from disabled ports. */
347 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
349 netif_carrier_on(dev);
351 #ifdef CONFIG_CHELSIO_T4_DCB
352 cxgb4_dcb_state_init(dev);
353 dcb_tx_queue_prio_enable(dev, false);
354 #endif /* CONFIG_CHELSIO_T4_DCB */
355 netif_carrier_off(dev);
362 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
364 static const char *mod_str[] = {
365 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
368 const struct net_device *dev = adap->port[port_id];
369 const struct port_info *pi = netdev_priv(dev);
371 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
372 netdev_info(dev, "port module unplugged\n");
373 else if (pi->mod_type < ARRAY_SIZE(mod_str))
374 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
378 * Configure the exact and hash address filters to handle a port's multicast
379 * and secondary unicast MAC addresses.
381 static int set_addr_filters(const struct net_device *dev, bool sleep)
389 const struct netdev_hw_addr *ha;
390 int uc_cnt = netdev_uc_count(dev);
391 int mc_cnt = netdev_mc_count(dev);
392 const struct port_info *pi = netdev_priv(dev);
393 unsigned int mb = pi->adapter->fn;
395 /* first do the secondary unicast addresses */
396 netdev_for_each_uc_addr(ha, dev) {
397 addr[naddr++] = ha->addr;
398 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
399 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
400 naddr, addr, filt_idx, &uhash, sleep);
409 /* next set up the multicast addresses */
410 netdev_for_each_mc_addr(ha, dev) {
411 addr[naddr++] = ha->addr;
412 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
413 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
414 naddr, addr, filt_idx, &mhash, sleep);
423 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
424 uhash | mhash, sleep);
427 int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
428 module_param(dbfifo_int_thresh, int, 0644);
429 MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
432 * usecs to sleep while draining the dbfifo
434 static int dbfifo_drain_delay = 1000;
435 module_param(dbfifo_drain_delay, int, 0644);
436 MODULE_PARM_DESC(dbfifo_drain_delay,
437 "usecs to sleep while draining the dbfifo");
440 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
441 * If @mtu is -1 it is left unchanged.
443 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
446 struct port_info *pi = netdev_priv(dev);
448 ret = set_addr_filters(dev, sleep_ok);
450 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
451 (dev->flags & IFF_PROMISC) ? 1 : 0,
452 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
458 * link_start - enable a port
459 * @dev: the port to enable
461 * Performs the MAC and PHY actions needed to enable a port.
463 static int link_start(struct net_device *dev)
466 struct port_info *pi = netdev_priv(dev);
467 unsigned int mb = pi->adapter->fn;
470 * We do not set address filters and promiscuity here, the stack does
471 * that step explicitly.
473 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
474 !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
476 ret = t4_change_mac(pi->adapter, mb, pi->viid,
477 pi->xact_addr_filt, dev->dev_addr, true,
480 pi->xact_addr_filt = ret;
485 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
489 ret = t4_enable_vi_params(pi->adapter, mb, pi->viid, true,
490 true, CXGB4_DCB_ENABLED);
497 int cxgb4_dcb_enabled(const struct net_device *dev)
499 #ifdef CONFIG_CHELSIO_T4_DCB
500 struct port_info *pi = netdev_priv(dev);
502 if (!pi->dcb.enabled)
505 return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) ||
506 (pi->dcb.state == CXGB4_DCB_STATE_HOST));
511 EXPORT_SYMBOL(cxgb4_dcb_enabled);
513 #ifdef CONFIG_CHELSIO_T4_DCB
514 /* Handle a Data Center Bridging update message from the firmware. */
515 static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd)
517 int port = FW_PORT_CMD_PORTID_G(ntohl(pcmd->op_to_portid));
518 struct net_device *dev = adap->port[port];
519 int old_dcb_enabled = cxgb4_dcb_enabled(dev);
522 cxgb4_dcb_handle_fw_update(adap, pcmd);
523 new_dcb_enabled = cxgb4_dcb_enabled(dev);
525 /* If the DCB has become enabled or disabled on the port then we're
526 * going to need to set up/tear down DCB Priority parameters for the
527 * TX Queues associated with the port.
529 if (new_dcb_enabled != old_dcb_enabled)
530 dcb_tx_queue_prio_enable(dev, new_dcb_enabled);
532 #endif /* CONFIG_CHELSIO_T4_DCB */
534 /* Clear a filter and release any of its resources that we own. This also
535 * clears the filter's "pending" status.
537 static void clear_filter(struct adapter *adap, struct filter_entry *f)
539 /* If the new or old filter have loopback rewriteing rules then we'll
540 * need to free any existing Layer Two Table (L2T) entries of the old
541 * filter rule. The firmware will handle freeing up any Source MAC
542 * Table (SMT) entries used for rewriting Source MAC Addresses in
546 cxgb4_l2t_release(f->l2t);
548 /* The zeroing of the filter rule below clears the filter valid,
549 * pending, locked flags, l2t pointer, etc. so it's all we need for
552 memset(f, 0, sizeof(*f));
555 /* Handle a filter write/deletion reply.
557 static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
559 unsigned int idx = GET_TID(rpl);
560 unsigned int nidx = idx - adap->tids.ftid_base;
562 struct filter_entry *f;
564 if (idx >= adap->tids.ftid_base && nidx <
565 (adap->tids.nftids + adap->tids.nsftids)) {
567 ret = TCB_COOKIE_G(rpl->cookie);
568 f = &adap->tids.ftid_tab[idx];
570 if (ret == FW_FILTER_WR_FLT_DELETED) {
571 /* Clear the filter when we get confirmation from the
572 * hardware that the filter has been deleted.
574 clear_filter(adap, f);
575 } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
576 dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
578 clear_filter(adap, f);
579 } else if (ret == FW_FILTER_WR_FLT_ADDED) {
580 f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
581 f->pending = 0; /* asynchronous setup completed */
584 /* Something went wrong. Issue a warning about the
585 * problem and clear everything out.
587 dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
589 clear_filter(adap, f);
594 /* Response queue handler for the FW event queue.
596 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
597 const struct pkt_gl *gl)
599 u8 opcode = ((const struct rss_header *)rsp)->opcode;
601 rsp++; /* skip RSS header */
603 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
605 if (unlikely(opcode == CPL_FW4_MSG &&
606 ((const struct cpl_fw4_msg *)rsp)->type == FW_TYPE_RSSCPL)) {
608 opcode = ((const struct rss_header *)rsp)->opcode;
610 if (opcode != CPL_SGE_EGR_UPDATE) {
611 dev_err(q->adap->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
617 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
618 const struct cpl_sge_egr_update *p = (void *)rsp;
619 unsigned int qid = EGR_QID_G(ntohl(p->opcode_qid));
622 txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
624 if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
625 struct sge_eth_txq *eq;
627 eq = container_of(txq, struct sge_eth_txq, q);
628 netif_tx_wake_queue(eq->txq);
630 struct sge_ofld_txq *oq;
632 oq = container_of(txq, struct sge_ofld_txq, q);
633 tasklet_schedule(&oq->qresume_tsk);
635 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
636 const struct cpl_fw6_msg *p = (void *)rsp;
638 #ifdef CONFIG_CHELSIO_T4_DCB
639 const struct fw_port_cmd *pcmd = (const void *)p->data;
640 unsigned int cmd = FW_CMD_OP_G(ntohl(pcmd->op_to_portid));
641 unsigned int action =
642 FW_PORT_CMD_ACTION_G(ntohl(pcmd->action_to_len16));
644 if (cmd == FW_PORT_CMD &&
645 action == FW_PORT_ACTION_GET_PORT_INFO) {
646 int port = FW_PORT_CMD_PORTID_G(
647 be32_to_cpu(pcmd->op_to_portid));
648 struct net_device *dev = q->adap->port[port];
649 int state_input = ((pcmd->u.info.dcbxdis_pkd &
650 FW_PORT_CMD_DCBXDIS_F)
651 ? CXGB4_DCB_INPUT_FW_DISABLED
652 : CXGB4_DCB_INPUT_FW_ENABLED);
654 cxgb4_dcb_state_fsm(dev, state_input);
657 if (cmd == FW_PORT_CMD &&
658 action == FW_PORT_ACTION_L2_DCB_CFG)
659 dcb_rpl(q->adap, pcmd);
663 t4_handle_fw_rpl(q->adap, p->data);
664 } else if (opcode == CPL_L2T_WRITE_RPL) {
665 const struct cpl_l2t_write_rpl *p = (void *)rsp;
667 do_l2t_write_rpl(q->adap, p);
668 } else if (opcode == CPL_SET_TCB_RPL) {
669 const struct cpl_set_tcb_rpl *p = (void *)rsp;
671 filter_rpl(q->adap, p);
673 dev_err(q->adap->pdev_dev,
674 "unexpected CPL %#x on FW event queue\n", opcode);
680 * uldrx_handler - response queue handler for ULD queues
681 * @q: the response queue that received the packet
682 * @rsp: the response queue descriptor holding the offload message
683 * @gl: the gather list of packet fragments
685 * Deliver an ingress offload packet to a ULD. All processing is done by
686 * the ULD, we just maintain statistics.
688 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
689 const struct pkt_gl *gl)
691 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
693 /* FW can send CPLs encapsulated in a CPL_FW4_MSG.
695 if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
696 ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
699 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
705 else if (gl == CXGB4_MSG_AN)
712 static void disable_msi(struct adapter *adapter)
714 if (adapter->flags & USING_MSIX) {
715 pci_disable_msix(adapter->pdev);
716 adapter->flags &= ~USING_MSIX;
717 } else if (adapter->flags & USING_MSI) {
718 pci_disable_msi(adapter->pdev);
719 adapter->flags &= ~USING_MSI;
724 * Interrupt handler for non-data events used with MSI-X.
726 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
728 struct adapter *adap = cookie;
729 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A));
733 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A), v);
735 t4_slow_intr_handler(adap);
740 * Name the MSI-X interrupts.
742 static void name_msix_vecs(struct adapter *adap)
744 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc);
746 /* non-data interrupts */
747 snprintf(adap->msix_info[0].desc, n, "%s", adap->port[0]->name);
750 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq",
751 adap->port[0]->name);
753 /* Ethernet queues */
754 for_each_port(adap, j) {
755 struct net_device *d = adap->port[j];
756 const struct port_info *pi = netdev_priv(d);
758 for (i = 0; i < pi->nqsets; i++, msi_idx++)
759 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
764 for_each_ofldrxq(&adap->sge, i)
765 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-ofld%d",
766 adap->port[0]->name, i);
768 for_each_rdmarxq(&adap->sge, i)
769 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
770 adap->port[0]->name, i);
772 for_each_rdmaciq(&adap->sge, i)
773 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
774 adap->port[0]->name, i);
777 static int request_msix_queue_irqs(struct adapter *adap)
779 struct sge *s = &adap->sge;
780 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
783 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
784 adap->msix_info[1].desc, &s->fw_evtq);
788 for_each_ethrxq(s, ethqidx) {
789 err = request_irq(adap->msix_info[msi_index].vec,
791 adap->msix_info[msi_index].desc,
792 &s->ethrxq[ethqidx].rspq);
797 for_each_ofldrxq(s, ofldqidx) {
798 err = request_irq(adap->msix_info[msi_index].vec,
800 adap->msix_info[msi_index].desc,
801 &s->ofldrxq[ofldqidx].rspq);
806 for_each_rdmarxq(s, rdmaqidx) {
807 err = request_irq(adap->msix_info[msi_index].vec,
809 adap->msix_info[msi_index].desc,
810 &s->rdmarxq[rdmaqidx].rspq);
815 for_each_rdmaciq(s, rdmaciqqidx) {
816 err = request_irq(adap->msix_info[msi_index].vec,
818 adap->msix_info[msi_index].desc,
819 &s->rdmaciq[rdmaciqqidx].rspq);
827 while (--rdmaciqqidx >= 0)
828 free_irq(adap->msix_info[--msi_index].vec,
829 &s->rdmaciq[rdmaciqqidx].rspq);
830 while (--rdmaqidx >= 0)
831 free_irq(adap->msix_info[--msi_index].vec,
832 &s->rdmarxq[rdmaqidx].rspq);
833 while (--ofldqidx >= 0)
834 free_irq(adap->msix_info[--msi_index].vec,
835 &s->ofldrxq[ofldqidx].rspq);
836 while (--ethqidx >= 0)
837 free_irq(adap->msix_info[--msi_index].vec,
838 &s->ethrxq[ethqidx].rspq);
839 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
843 static void free_msix_queue_irqs(struct adapter *adap)
845 int i, msi_index = 2;
846 struct sge *s = &adap->sge;
848 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
849 for_each_ethrxq(s, i)
850 free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
851 for_each_ofldrxq(s, i)
852 free_irq(adap->msix_info[msi_index++].vec, &s->ofldrxq[i].rspq);
853 for_each_rdmarxq(s, i)
854 free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
855 for_each_rdmaciq(s, i)
856 free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
860 * write_rss - write the RSS table for a given port
862 * @queues: array of queue indices for RSS
864 * Sets up the portion of the HW RSS table for the port's VI to distribute
865 * packets to the Rx queues in @queues.
867 static int write_rss(const struct port_info *pi, const u16 *queues)
871 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
873 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
877 /* map the queue indices to queue ids */
878 for (i = 0; i < pi->rss_size; i++, queues++)
879 rss[i] = q[*queues].rspq.abs_id;
881 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
882 pi->rss_size, rss, pi->rss_size);
888 * setup_rss - configure RSS
891 * Sets up RSS for each port.
893 static int setup_rss(struct adapter *adap)
897 for_each_port(adap, i) {
898 const struct port_info *pi = adap2pinfo(adap, i);
900 err = write_rss(pi, pi->rss);
908 * Return the channel of the ingress queue with the given qid.
910 static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
912 qid -= p->ingr_start;
913 return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
917 * Wait until all NAPI handlers are descheduled.
919 static void quiesce_rx(struct adapter *adap)
923 for (i = 0; i < adap->sge.ingr_sz; i++) {
924 struct sge_rspq *q = adap->sge.ingr_map[i];
926 if (q && q->handler) {
927 napi_disable(&q->napi);
929 while (!cxgb_poll_lock_napi(q))
937 /* Disable interrupt and napi handler */
938 static void disable_interrupts(struct adapter *adap)
940 if (adap->flags & FULL_INIT_DONE) {
941 t4_intr_disable(adap);
942 if (adap->flags & USING_MSIX) {
943 free_msix_queue_irqs(adap);
944 free_irq(adap->msix_info[0].vec, adap);
946 free_irq(adap->pdev->irq, adap);
953 * Enable NAPI scheduling and interrupt generation for all Rx queues.
955 static void enable_rx(struct adapter *adap)
959 for (i = 0; i < adap->sge.ingr_sz; i++) {
960 struct sge_rspq *q = adap->sge.ingr_map[i];
965 cxgb_busy_poll_init_lock(q);
966 napi_enable(&q->napi);
968 /* 0-increment GTS to start the timer and enable interrupts */
969 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
970 SEINTARM_V(q->intr_params) |
971 INGRESSQID_V(q->cntxt_id));
976 * setup_sge_queues - configure SGE Tx/Rx/response queues
979 * Determines how many sets of SGE queues to use and initializes them.
980 * We support multiple queue sets per port if we have MSI-X, otherwise
981 * just one queue set per port.
983 static int setup_sge_queues(struct adapter *adap)
985 int err, msi_idx, i, j;
986 struct sge *s = &adap->sge;
988 bitmap_zero(s->starving_fl, s->egr_sz);
989 bitmap_zero(s->txq_maperr, s->egr_sz);
991 if (adap->flags & USING_MSIX)
992 msi_idx = 1; /* vector 0 is for non-queue interrupts */
994 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
998 msi_idx = -((int)s->intrq.abs_id + 1);
1001 /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
1002 * don't forget to update the following which need to be
1003 * synchronized to and changes here.
1005 * 1. The calculations of MAX_INGQ in cxgb4.h.
1007 * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
1008 * to accommodate any new/deleted Ingress Queues
1009 * which need MSI-X Vectors.
1011 * 3. Update sge_qinfo_show() to include information on the
1012 * new/deleted queues.
1014 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
1015 msi_idx, NULL, fwevtq_handler);
1017 freeout: t4_free_sge_resources(adap);
1021 for_each_port(adap, i) {
1022 struct net_device *dev = adap->port[i];
1023 struct port_info *pi = netdev_priv(dev);
1024 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
1025 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
1027 for (j = 0; j < pi->nqsets; j++, q++) {
1030 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
1036 memset(&q->stats, 0, sizeof(q->stats));
1038 for (j = 0; j < pi->nqsets; j++, t++) {
1039 err = t4_sge_alloc_eth_txq(adap, t, dev,
1040 netdev_get_tx_queue(dev, j),
1041 s->fw_evtq.cntxt_id);
1047 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
1048 for_each_ofldrxq(s, i) {
1049 struct sge_ofld_rxq *q = &s->ofldrxq[i];
1050 struct net_device *dev = adap->port[i / j];
1054 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
1055 q->fl.size ? &q->fl : NULL,
1059 memset(&q->stats, 0, sizeof(q->stats));
1060 s->ofld_rxq[i] = q->rspq.abs_id;
1061 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
1062 s->fw_evtq.cntxt_id);
1067 for_each_rdmarxq(s, i) {
1068 struct sge_ofld_rxq *q = &s->rdmarxq[i];
1072 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
1073 msi_idx, q->fl.size ? &q->fl : NULL,
1077 memset(&q->stats, 0, sizeof(q->stats));
1078 s->rdma_rxq[i] = q->rspq.abs_id;
1081 for_each_rdmaciq(s, i) {
1082 struct sge_ofld_rxq *q = &s->rdmaciq[i];
1086 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
1087 msi_idx, q->fl.size ? &q->fl : NULL,
1091 memset(&q->stats, 0, sizeof(q->stats));
1092 s->rdma_ciq[i] = q->rspq.abs_id;
1095 for_each_port(adap, i) {
1097 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
1098 * have RDMA queues, and that's the right value.
1100 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
1101 s->fw_evtq.cntxt_id,
1102 s->rdmarxq[i].rspq.cntxt_id);
1107 t4_write_reg(adap, is_t4(adap->params.chip) ?
1108 MPS_TRC_RSS_CONTROL_A :
1109 MPS_T5_TRC_RSS_CONTROL_A,
1110 RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
1111 QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
1116 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
1117 * The allocated memory is cleared.
1119 void *t4_alloc_mem(size_t size)
1121 void *p = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1129 * Free memory allocated through alloc_mem().
1131 void t4_free_mem(void *addr)
1133 if (is_vmalloc_addr(addr))
1139 /* Send a Work Request to write the filter at a specified index. We construct
1140 * a Firmware Filter Work Request to have the work done and put the indicated
1141 * filter into "pending" mode which will prevent any further actions against
1142 * it till we get a reply from the firmware on the completion status of the
1145 static int set_filter_wr(struct adapter *adapter, int fidx)
1147 struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
1148 struct sk_buff *skb;
1149 struct fw_filter_wr *fwr;
1152 /* If the new filter requires loopback Destination MAC and/or VLAN
1153 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
1156 if (f->fs.newdmac || f->fs.newvlan) {
1157 /* allocate L2T entry for new filter */
1158 f->l2t = t4_l2t_alloc_switching(adapter->l2t);
1161 if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan,
1162 f->fs.eport, f->fs.dmac)) {
1163 cxgb4_l2t_release(f->l2t);
1169 ftid = adapter->tids.ftid_base + fidx;
1171 skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL);
1172 fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
1173 memset(fwr, 0, sizeof(*fwr));
1175 /* It would be nice to put most of the following in t4_hw.c but most
1176 * of the work is translating the cxgbtool ch_filter_specification
1177 * into the Work Request and the definition of that structure is
1178 * currently in cxgbtool.h which isn't appropriate to pull into the
1179 * common code. We may eventually try to come up with a more neutral
1180 * filter specification structure but for now it's easiest to simply
1181 * put this fairly direct code in line ...
1183 fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
1184 fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
1186 htonl(FW_FILTER_WR_TID_V(ftid) |
1187 FW_FILTER_WR_RQTYPE_V(f->fs.type) |
1188 FW_FILTER_WR_NOREPLY_V(0) |
1189 FW_FILTER_WR_IQ_V(f->fs.iq));
1190 fwr->del_filter_to_l2tix =
1191 htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
1192 FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
1193 FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
1194 FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
1195 FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
1196 FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
1197 FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
1198 FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
1199 FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
1200 f->fs.newvlan == VLAN_REWRITE) |
1201 FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
1202 f->fs.newvlan == VLAN_REWRITE) |
1203 FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
1204 FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
1205 FW_FILTER_WR_PRIO_V(f->fs.prio) |
1206 FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
1207 fwr->ethtype = htons(f->fs.val.ethtype);
1208 fwr->ethtypem = htons(f->fs.mask.ethtype);
1209 fwr->frag_to_ovlan_vldm =
1210 (FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
1211 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
1212 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
1213 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
1214 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
1215 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
1217 fwr->rx_chan_rx_rpl_iq =
1218 htons(FW_FILTER_WR_RX_CHAN_V(0) |
1219 FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
1220 fwr->maci_to_matchtypem =
1221 htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
1222 FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
1223 FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
1224 FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
1225 FW_FILTER_WR_PORT_V(f->fs.val.iport) |
1226 FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
1227 FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
1228 FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
1229 fwr->ptcl = f->fs.val.proto;
1230 fwr->ptclm = f->fs.mask.proto;
1231 fwr->ttyp = f->fs.val.tos;
1232 fwr->ttypm = f->fs.mask.tos;
1233 fwr->ivlan = htons(f->fs.val.ivlan);
1234 fwr->ivlanm = htons(f->fs.mask.ivlan);
1235 fwr->ovlan = htons(f->fs.val.ovlan);
1236 fwr->ovlanm = htons(f->fs.mask.ovlan);
1237 memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
1238 memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
1239 memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
1240 memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
1241 fwr->lp = htons(f->fs.val.lport);
1242 fwr->lpm = htons(f->fs.mask.lport);
1243 fwr->fp = htons(f->fs.val.fport);
1244 fwr->fpm = htons(f->fs.mask.fport);
1246 memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
1248 /* Mark the filter as "pending" and ship off the Filter Work Request.
1249 * When we get the Work Request Reply we'll clear the pending status.
1252 set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
1253 t4_ofld_send(adapter, skb);
1257 /* Delete the filter at a specified index.
1259 static int del_filter_wr(struct adapter *adapter, int fidx)
1261 struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
1262 struct sk_buff *skb;
1263 struct fw_filter_wr *fwr;
1264 unsigned int len, ftid;
1267 ftid = adapter->tids.ftid_base + fidx;
1269 skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
1270 fwr = (struct fw_filter_wr *)__skb_put(skb, len);
1271 t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
1273 /* Mark the filter as "pending" and ship off the Filter Work Request.
1274 * When we get the Work Request Reply we'll clear the pending status.
1277 t4_mgmt_tx(adapter, skb);
1281 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
1282 void *accel_priv, select_queue_fallback_t fallback)
1286 #ifdef CONFIG_CHELSIO_T4_DCB
1287 /* If a Data Center Bridging has been successfully negotiated on this
1288 * link then we'll use the skb's priority to map it to a TX Queue.
1289 * The skb's priority is determined via the VLAN Tag Priority Code
1292 if (cxgb4_dcb_enabled(dev)) {
1296 err = vlan_get_tag(skb, &vlan_tci);
1297 if (unlikely(err)) {
1298 if (net_ratelimit())
1300 "TX Packet without VLAN Tag on DCB Link\n");
1303 txq = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
1307 #endif /* CONFIG_CHELSIO_T4_DCB */
1310 txq = (skb_rx_queue_recorded(skb)
1311 ? skb_get_rx_queue(skb)
1312 : smp_processor_id());
1314 while (unlikely(txq >= dev->real_num_tx_queues))
1315 txq -= dev->real_num_tx_queues;
1320 return fallback(dev, skb) % dev->real_num_tx_queues;
1323 static inline int is_offload(const struct adapter *adap)
1325 return adap->params.offload;
1329 * Implementation of ethtool operations.
1332 static u32 get_msglevel(struct net_device *dev)
1334 return netdev2adap(dev)->msg_enable;
1337 static void set_msglevel(struct net_device *dev, u32 val)
1339 netdev2adap(dev)->msg_enable = val;
1342 static char stats_strings[][ETH_GSTRING_LEN] = {
1345 "TxBroadcastFrames ",
1346 "TxMulticastFrames ",
1352 "TxFrames128To255 ",
1353 "TxFrames256To511 ",
1354 "TxFrames512To1023 ",
1355 "TxFrames1024To1518 ",
1356 "TxFrames1519ToMax ",
1371 "RxBroadcastFrames ",
1372 "RxMulticastFrames ",
1384 "RxFrames128To255 ",
1385 "RxFrames256To511 ",
1386 "RxFrames512To1023 ",
1387 "RxFrames1024To1518 ",
1388 "RxFrames1519ToMax ",
1400 "RxBG0FramesDropped ",
1401 "RxBG1FramesDropped ",
1402 "RxBG2FramesDropped ",
1403 "RxBG3FramesDropped ",
1404 "RxBG0FramesTrunc ",
1405 "RxBG1FramesTrunc ",
1406 "RxBG2FramesTrunc ",
1407 "RxBG3FramesTrunc ",
1416 "WriteCoalSuccess ",
1420 static int get_sset_count(struct net_device *dev, int sset)
1424 return ARRAY_SIZE(stats_strings);
1430 #define T4_REGMAP_SIZE (160 * 1024)
1431 #define T5_REGMAP_SIZE (332 * 1024)
1433 static int get_regs_len(struct net_device *dev)
1435 struct adapter *adap = netdev2adap(dev);
1436 if (is_t4(adap->params.chip))
1437 return T4_REGMAP_SIZE;
1439 return T5_REGMAP_SIZE;
1442 static int get_eeprom_len(struct net_device *dev)
1447 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1449 struct adapter *adapter = netdev2adap(dev);
1452 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1453 strlcpy(info->version, DRV_VERSION, sizeof(info->version));
1454 strlcpy(info->bus_info, pci_name(adapter->pdev),
1455 sizeof(info->bus_info));
1457 if (adapter->params.fw_vers)
1458 snprintf(info->fw_version, sizeof(info->fw_version),
1459 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1460 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
1461 FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
1462 FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
1463 FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers),
1464 FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
1465 FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
1466 FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
1467 FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
1469 if (!t4_get_exprom_version(adapter, &exprom_vers))
1470 snprintf(info->erom_version, sizeof(info->erom_version),
1472 FW_HDR_FW_VER_MAJOR_G(exprom_vers),
1473 FW_HDR_FW_VER_MINOR_G(exprom_vers),
1474 FW_HDR_FW_VER_MICRO_G(exprom_vers),
1475 FW_HDR_FW_VER_BUILD_G(exprom_vers));
1478 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1480 if (stringset == ETH_SS_STATS)
1481 memcpy(data, stats_strings, sizeof(stats_strings));
1485 * port stats maintained per queue of the port. They should be in the same
1486 * order as in stats_strings above.
1488 struct queue_port_stats {
1498 static void collect_sge_port_stats(const struct adapter *adap,
1499 const struct port_info *p, struct queue_port_stats *s)
1502 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1503 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1505 memset(s, 0, sizeof(*s));
1506 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1508 s->tx_csum += tx->tx_cso;
1509 s->rx_csum += rx->stats.rx_cso;
1510 s->vlan_ex += rx->stats.vlan_ex;
1511 s->vlan_ins += tx->vlan_ins;
1512 s->gro_pkts += rx->stats.lro_pkts;
1513 s->gro_merged += rx->stats.lro_merged;
1517 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1520 struct port_info *pi = netdev_priv(dev);
1521 struct adapter *adapter = pi->adapter;
1524 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1526 data += sizeof(struct port_stats) / sizeof(u64);
1527 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1528 data += sizeof(struct queue_port_stats) / sizeof(u64);
1529 if (!is_t4(adapter->params.chip)) {
1530 t4_write_reg(adapter, SGE_STAT_CFG_A, STATSOURCE_T5_V(7));
1531 val1 = t4_read_reg(adapter, SGE_STAT_TOTAL_A);
1532 val2 = t4_read_reg(adapter, SGE_STAT_MATCH_A);
1533 *data = val1 - val2;
1538 memset(data, 0, 2 * sizeof(u64));
1544 * Return a version number to identify the type of adapter. The scheme is:
1545 * - bits 0..9: chip version
1546 * - bits 10..15: chip revision
1547 * - bits 16..23: register dump version
1549 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1551 return CHELSIO_CHIP_VERSION(ap->params.chip) |
1552 (CHELSIO_CHIP_RELEASE(ap->params.chip) << 10) | (1 << 16);
1555 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1558 u32 *p = buf + start;
1560 for ( ; start <= end; start += sizeof(u32))
1561 *p++ = t4_read_reg(ap, start);
1564 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1567 static const unsigned int t4_reg_ranges[] = {
1788 static const unsigned int t5_reg_ranges[] = {
2217 struct adapter *ap = netdev2adap(dev);
2218 static const unsigned int *reg_ranges;
2219 int arr_size = 0, buf_size = 0;
2221 if (is_t4(ap->params.chip)) {
2222 reg_ranges = &t4_reg_ranges[0];
2223 arr_size = ARRAY_SIZE(t4_reg_ranges);
2224 buf_size = T4_REGMAP_SIZE;
2226 reg_ranges = &t5_reg_ranges[0];
2227 arr_size = ARRAY_SIZE(t5_reg_ranges);
2228 buf_size = T5_REGMAP_SIZE;
2231 regs->version = mk_adap_vers(ap);
2233 memset(buf, 0, buf_size);
2234 for (i = 0; i < arr_size; i += 2)
2235 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
2238 static int restart_autoneg(struct net_device *dev)
2240 struct port_info *p = netdev_priv(dev);
2242 if (!netif_running(dev))
2244 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
2246 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
2250 static int identify_port(struct net_device *dev,
2251 enum ethtool_phys_id_state state)
2254 struct adapter *adap = netdev2adap(dev);
2256 if (state == ETHTOOL_ID_ACTIVE)
2258 else if (state == ETHTOOL_ID_INACTIVE)
2263 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid, val);
2266 static unsigned int from_fw_linkcaps(enum fw_port_type type, unsigned int caps)
2270 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
2271 type == FW_PORT_TYPE_BT_XAUI) {
2273 if (caps & FW_PORT_CAP_SPEED_100M)
2274 v |= SUPPORTED_100baseT_Full;
2275 if (caps & FW_PORT_CAP_SPEED_1G)
2276 v |= SUPPORTED_1000baseT_Full;
2277 if (caps & FW_PORT_CAP_SPEED_10G)
2278 v |= SUPPORTED_10000baseT_Full;
2279 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
2280 v |= SUPPORTED_Backplane;
2281 if (caps & FW_PORT_CAP_SPEED_1G)
2282 v |= SUPPORTED_1000baseKX_Full;
2283 if (caps & FW_PORT_CAP_SPEED_10G)
2284 v |= SUPPORTED_10000baseKX4_Full;
2285 } else if (type == FW_PORT_TYPE_KR)
2286 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
2287 else if (type == FW_PORT_TYPE_BP_AP)
2288 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
2289 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
2290 else if (type == FW_PORT_TYPE_BP4_AP)
2291 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
2292 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
2293 SUPPORTED_10000baseKX4_Full;
2294 else if (type == FW_PORT_TYPE_FIBER_XFI ||
2295 type == FW_PORT_TYPE_FIBER_XAUI ||
2296 type == FW_PORT_TYPE_SFP ||
2297 type == FW_PORT_TYPE_QSFP_10G ||
2298 type == FW_PORT_TYPE_QSA) {
2299 v |= SUPPORTED_FIBRE;
2300 if (caps & FW_PORT_CAP_SPEED_1G)
2301 v |= SUPPORTED_1000baseT_Full;
2302 if (caps & FW_PORT_CAP_SPEED_10G)
2303 v |= SUPPORTED_10000baseT_Full;
2304 } else if (type == FW_PORT_TYPE_BP40_BA ||
2305 type == FW_PORT_TYPE_QSFP) {
2306 v |= SUPPORTED_40000baseSR4_Full;
2307 v |= SUPPORTED_FIBRE;
2310 if (caps & FW_PORT_CAP_ANEG)
2311 v |= SUPPORTED_Autoneg;
2315 static unsigned int to_fw_linkcaps(unsigned int caps)
2319 if (caps & ADVERTISED_100baseT_Full)
2320 v |= FW_PORT_CAP_SPEED_100M;
2321 if (caps & ADVERTISED_1000baseT_Full)
2322 v |= FW_PORT_CAP_SPEED_1G;
2323 if (caps & ADVERTISED_10000baseT_Full)
2324 v |= FW_PORT_CAP_SPEED_10G;
2325 if (caps & ADVERTISED_40000baseSR4_Full)
2326 v |= FW_PORT_CAP_SPEED_40G;
2330 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
2332 const struct port_info *p = netdev_priv(dev);
2334 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
2335 p->port_type == FW_PORT_TYPE_BT_XFI ||
2336 p->port_type == FW_PORT_TYPE_BT_XAUI)
2337 cmd->port = PORT_TP;
2338 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
2339 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
2340 cmd->port = PORT_FIBRE;
2341 else if (p->port_type == FW_PORT_TYPE_SFP ||
2342 p->port_type == FW_PORT_TYPE_QSFP_10G ||
2343 p->port_type == FW_PORT_TYPE_QSA ||
2344 p->port_type == FW_PORT_TYPE_QSFP) {
2345 if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
2346 p->mod_type == FW_PORT_MOD_TYPE_SR ||
2347 p->mod_type == FW_PORT_MOD_TYPE_ER ||
2348 p->mod_type == FW_PORT_MOD_TYPE_LRM)
2349 cmd->port = PORT_FIBRE;
2350 else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
2351 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
2352 cmd->port = PORT_DA;
2354 cmd->port = PORT_OTHER;
2356 cmd->port = PORT_OTHER;
2358 if (p->mdio_addr >= 0) {
2359 cmd->phy_address = p->mdio_addr;
2360 cmd->transceiver = XCVR_EXTERNAL;
2361 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
2362 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
2364 cmd->phy_address = 0; /* not really, but no better option */
2365 cmd->transceiver = XCVR_INTERNAL;
2366 cmd->mdio_support = 0;
2369 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
2370 cmd->advertising = from_fw_linkcaps(p->port_type,
2371 p->link_cfg.advertising);
2372 ethtool_cmd_speed_set(cmd,
2373 netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
2374 cmd->duplex = DUPLEX_FULL;
2375 cmd->autoneg = p->link_cfg.autoneg;
2381 static unsigned int speed_to_caps(int speed)
2384 return FW_PORT_CAP_SPEED_100M;
2386 return FW_PORT_CAP_SPEED_1G;
2388 return FW_PORT_CAP_SPEED_10G;
2390 return FW_PORT_CAP_SPEED_40G;
2394 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
2397 struct port_info *p = netdev_priv(dev);
2398 struct link_config *lc = &p->link_cfg;
2399 u32 speed = ethtool_cmd_speed(cmd);
2401 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
2404 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
2406 * PHY offers a single speed. See if that's what's
2409 if (cmd->autoneg == AUTONEG_DISABLE &&
2410 (lc->supported & speed_to_caps(speed)))
2415 if (cmd->autoneg == AUTONEG_DISABLE) {
2416 cap = speed_to_caps(speed);
2418 if (!(lc->supported & cap) ||
2423 lc->requested_speed = cap;
2424 lc->advertising = 0;
2426 cap = to_fw_linkcaps(cmd->advertising);
2427 if (!(lc->supported & cap))
2429 lc->requested_speed = 0;
2430 lc->advertising = cap | FW_PORT_CAP_ANEG;
2432 lc->autoneg = cmd->autoneg;
2434 if (netif_running(dev))
2435 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
2440 static void get_pauseparam(struct net_device *dev,
2441 struct ethtool_pauseparam *epause)
2443 struct port_info *p = netdev_priv(dev);
2445 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
2446 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
2447 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
2450 static int set_pauseparam(struct net_device *dev,
2451 struct ethtool_pauseparam *epause)
2453 struct port_info *p = netdev_priv(dev);
2454 struct link_config *lc = &p->link_cfg;
2456 if (epause->autoneg == AUTONEG_DISABLE)
2457 lc->requested_fc = 0;
2458 else if (lc->supported & FW_PORT_CAP_ANEG)
2459 lc->requested_fc = PAUSE_AUTONEG;
2463 if (epause->rx_pause)
2464 lc->requested_fc |= PAUSE_RX;
2465 if (epause->tx_pause)
2466 lc->requested_fc |= PAUSE_TX;
2467 if (netif_running(dev))
2468 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
2473 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
2475 const struct port_info *pi = netdev_priv(dev);
2476 const struct sge *s = &pi->adapter->sge;
2478 e->rx_max_pending = MAX_RX_BUFFERS;
2479 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
2480 e->rx_jumbo_max_pending = 0;
2481 e->tx_max_pending = MAX_TXQ_ENTRIES;
2483 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
2484 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
2485 e->rx_jumbo_pending = 0;
2486 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
2489 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
2492 const struct port_info *pi = netdev_priv(dev);
2493 struct adapter *adapter = pi->adapter;
2494 struct sge *s = &adapter->sge;
2496 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
2497 e->tx_pending > MAX_TXQ_ENTRIES ||
2498 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
2499 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
2500 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
2503 if (adapter->flags & FULL_INIT_DONE)
2506 for (i = 0; i < pi->nqsets; ++i) {
2507 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
2508 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
2509 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
2514 static int closest_timer(const struct sge *s, int time)
2516 int i, delta, match = 0, min_delta = INT_MAX;
2518 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
2519 delta = time - s->timer_val[i];
2522 if (delta < min_delta) {
2530 static int closest_thres(const struct sge *s, int thres)
2532 int i, delta, match = 0, min_delta = INT_MAX;
2534 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
2535 delta = thres - s->counter_val[i];
2538 if (delta < min_delta) {
2547 * Return a queue's interrupt hold-off time in us. 0 means no timer.
2549 unsigned int qtimer_val(const struct adapter *adap,
2550 const struct sge_rspq *q)
2552 unsigned int idx = q->intr_params >> 1;
2554 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
2558 * set_rspq_intr_params - set a queue's interrupt holdoff parameters
2560 * @us: the hold-off time in us, or 0 to disable timer
2561 * @cnt: the hold-off packet count, or 0 to disable counter
2563 * Sets an Rx queue's interrupt hold-off time and packet count. At least
2564 * one of the two needs to be enabled for the queue to generate interrupts.
2566 static int set_rspq_intr_params(struct sge_rspq *q,
2567 unsigned int us, unsigned int cnt)
2569 struct adapter *adap = q->adap;
2571 if ((us | cnt) == 0)
2578 new_idx = closest_thres(&adap->sge, cnt);
2579 if (q->desc && q->pktcnt_idx != new_idx) {
2580 /* the queue has already been created, update it */
2581 v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
2582 FW_PARAMS_PARAM_X_V(
2583 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
2584 FW_PARAMS_PARAM_YZ_V(q->cntxt_id);
2585 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
2590 q->pktcnt_idx = new_idx;
2593 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
2594 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
2599 * set_rx_intr_params - set a net devices's RX interrupt holdoff paramete!
2600 * @dev: the network device
2601 * @us: the hold-off time in us, or 0 to disable timer
2602 * @cnt: the hold-off packet count, or 0 to disable counter
2604 * Set the RX interrupt hold-off parameters for a network device.
2606 static int set_rx_intr_params(struct net_device *dev,
2607 unsigned int us, unsigned int cnt)
2610 struct port_info *pi = netdev_priv(dev);
2611 struct adapter *adap = pi->adapter;
2612 struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
2614 for (i = 0; i < pi->nqsets; i++, q++) {
2615 err = set_rspq_intr_params(&q->rspq, us, cnt);
2622 static int set_adaptive_rx_setting(struct net_device *dev, int adaptive_rx)
2625 struct port_info *pi = netdev_priv(dev);
2626 struct adapter *adap = pi->adapter;
2627 struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
2629 for (i = 0; i < pi->nqsets; i++, q++)
2630 q->rspq.adaptive_rx = adaptive_rx;
2635 static int get_adaptive_rx_setting(struct net_device *dev)
2637 struct port_info *pi = netdev_priv(dev);
2638 struct adapter *adap = pi->adapter;
2639 struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
2641 return q->rspq.adaptive_rx;
2644 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
2646 set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce);
2647 return set_rx_intr_params(dev, c->rx_coalesce_usecs,
2648 c->rx_max_coalesced_frames);
2651 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
2653 const struct port_info *pi = netdev_priv(dev);
2654 const struct adapter *adap = pi->adapter;
2655 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
2657 c->rx_coalesce_usecs = qtimer_val(adap, rq);
2658 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
2659 adap->sge.counter_val[rq->pktcnt_idx] : 0;
2660 c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev);
2665 * eeprom_ptov - translate a physical EEPROM address to virtual
2666 * @phys_addr: the physical EEPROM address
2667 * @fn: the PCI function number
2668 * @sz: size of function-specific area
2670 * Translate a physical EEPROM address to virtual. The first 1K is
2671 * accessed through virtual addresses starting at 31K, the rest is
2672 * accessed through virtual addresses starting at 0.
2674 * The mapping is as follows:
2675 * [0..1K) -> [31K..32K)
2676 * [1K..1K+A) -> [31K-A..31K)
2677 * [1K+A..ES) -> [0..ES-A-1K)
2679 * where A = @fn * @sz, and ES = EEPROM size.
2681 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
2684 if (phys_addr < 1024)
2685 return phys_addr + (31 << 10);
2686 if (phys_addr < 1024 + fn)
2687 return 31744 - fn + phys_addr - 1024;
2688 if (phys_addr < EEPROMSIZE)
2689 return phys_addr - 1024 - fn;
2694 * The next two routines implement eeprom read/write from physical addresses.
2696 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
2698 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
2701 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
2702 return vaddr < 0 ? vaddr : 0;
2705 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
2707 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
2710 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
2711 return vaddr < 0 ? vaddr : 0;
2714 #define EEPROM_MAGIC 0x38E2F10C
2716 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
2720 struct adapter *adapter = netdev2adap(dev);
2722 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
2726 e->magic = EEPROM_MAGIC;
2727 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
2728 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
2731 memcpy(data, buf + e->offset, e->len);
2736 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
2741 u32 aligned_offset, aligned_len, *p;
2742 struct adapter *adapter = netdev2adap(dev);
2744 if (eeprom->magic != EEPROM_MAGIC)
2747 aligned_offset = eeprom->offset & ~3;
2748 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
2750 if (adapter->fn > 0) {
2751 u32 start = 1024 + adapter->fn * EEPROMPFSIZE;
2753 if (aligned_offset < start ||
2754 aligned_offset + aligned_len > start + EEPROMPFSIZE)
2758 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
2760 * RMW possibly needed for first or last words.
2762 buf = kmalloc(aligned_len, GFP_KERNEL);
2765 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
2766 if (!err && aligned_len > 4)
2767 err = eeprom_rd_phys(adapter,
2768 aligned_offset + aligned_len - 4,
2769 (u32 *)&buf[aligned_len - 4]);
2772 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
2776 err = t4_seeprom_wp(adapter, false);
2780 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2781 err = eeprom_wr_phys(adapter, aligned_offset, *p);
2782 aligned_offset += 4;
2786 err = t4_seeprom_wp(adapter, true);
2793 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
2796 const struct firmware *fw;
2797 struct adapter *adap = netdev2adap(netdev);
2798 unsigned int mbox = PCIE_FW_MASTER_M + 1;
2800 ef->data[sizeof(ef->data) - 1] = '\0';
2801 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
2805 /* If the adapter has been fully initialized then we'll go ahead and
2806 * try to get the firmware's cooperation in upgrading to the new
2807 * firmware image otherwise we'll try to do the entire job from the
2808 * host ... and we always "force" the operation in this path.
2810 if (adap->flags & FULL_INIT_DONE)
2813 ret = t4_fw_upgrade(adap, mbox, fw->data, fw->size, 1);
2814 release_firmware(fw);
2816 dev_info(adap->pdev_dev, "loaded firmware %s,"
2817 " reload cxgb4 driver\n", ef->data);
2821 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
2822 #define BCAST_CRC 0xa0ccc1a6
2824 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
2826 wol->supported = WAKE_BCAST | WAKE_MAGIC;
2827 wol->wolopts = netdev2adap(dev)->wol;
2828 memset(&wol->sopass, 0, sizeof(wol->sopass));
2831 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
2834 struct port_info *pi = netdev_priv(dev);
2836 if (wol->wolopts & ~WOL_SUPPORTED)
2838 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
2839 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
2840 if (wol->wolopts & WAKE_BCAST) {
2841 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
2844 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
2845 ~6ULL, ~0ULL, BCAST_CRC, true);
2847 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
2851 static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
2853 const struct port_info *pi = netdev_priv(dev);
2854 netdev_features_t changed = dev->features ^ features;
2857 if (!(changed & NETIF_F_HW_VLAN_CTAG_RX))
2860 err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1,
2862 !!(features & NETIF_F_HW_VLAN_CTAG_RX), true);
2864 dev->features = features ^ NETIF_F_HW_VLAN_CTAG_RX;
2868 static u32 get_rss_table_size(struct net_device *dev)
2870 const struct port_info *pi = netdev_priv(dev);
2872 return pi->rss_size;
2875 static int get_rss_table(struct net_device *dev, u32 *p, u8 *key, u8 *hfunc)
2877 const struct port_info *pi = netdev_priv(dev);
2878 unsigned int n = pi->rss_size;
2881 *hfunc = ETH_RSS_HASH_TOP;
2889 static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key,
2893 struct port_info *pi = netdev_priv(dev);
2895 /* We require at least one supported parameter to be changed and no
2896 * change in any of the unsupported parameters
2899 (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
2904 for (i = 0; i < pi->rss_size; i++)
2906 if (pi->adapter->flags & FULL_INIT_DONE)
2907 return write_rss(pi, pi->rss);
2911 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
2914 const struct port_info *pi = netdev_priv(dev);
2916 switch (info->cmd) {
2917 case ETHTOOL_GRXFH: {
2918 unsigned int v = pi->rss_mode;
2921 switch (info->flow_type) {
2923 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F)
2924 info->data = RXH_IP_SRC | RXH_IP_DST |
2925 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2926 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F)
2927 info->data = RXH_IP_SRC | RXH_IP_DST;
2930 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F) &&
2931 (v & FW_RSS_VI_CONFIG_CMD_UDPEN_F))
2932 info->data = RXH_IP_SRC | RXH_IP_DST |
2933 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2934 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F)
2935 info->data = RXH_IP_SRC | RXH_IP_DST;
2938 case AH_ESP_V4_FLOW:
2940 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F)
2941 info->data = RXH_IP_SRC | RXH_IP_DST;
2944 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F)
2945 info->data = RXH_IP_SRC | RXH_IP_DST |
2946 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2947 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F)
2948 info->data = RXH_IP_SRC | RXH_IP_DST;
2951 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F) &&
2952 (v & FW_RSS_VI_CONFIG_CMD_UDPEN_F))
2953 info->data = RXH_IP_SRC | RXH_IP_DST |
2954 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2955 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F)
2956 info->data = RXH_IP_SRC | RXH_IP_DST;
2959 case AH_ESP_V6_FLOW:
2961 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F)
2962 info->data = RXH_IP_SRC | RXH_IP_DST;
2967 case ETHTOOL_GRXRINGS:
2968 info->data = pi->nqsets;
2974 static const struct ethtool_ops cxgb_ethtool_ops = {
2975 .get_settings = get_settings,
2976 .set_settings = set_settings,
2977 .get_drvinfo = get_drvinfo,
2978 .get_msglevel = get_msglevel,
2979 .set_msglevel = set_msglevel,
2980 .get_ringparam = get_sge_param,
2981 .set_ringparam = set_sge_param,
2982 .get_coalesce = get_coalesce,
2983 .set_coalesce = set_coalesce,
2984 .get_eeprom_len = get_eeprom_len,
2985 .get_eeprom = get_eeprom,
2986 .set_eeprom = set_eeprom,
2987 .get_pauseparam = get_pauseparam,
2988 .set_pauseparam = set_pauseparam,
2989 .get_link = ethtool_op_get_link,
2990 .get_strings = get_strings,
2991 .set_phys_id = identify_port,
2992 .nway_reset = restart_autoneg,
2993 .get_sset_count = get_sset_count,
2994 .get_ethtool_stats = get_stats,
2995 .get_regs_len = get_regs_len,
2996 .get_regs = get_regs,
2999 .get_rxnfc = get_rxnfc,
3000 .get_rxfh_indir_size = get_rss_table_size,
3001 .get_rxfh = get_rss_table,
3002 .set_rxfh = set_rss_table,
3003 .flash_device = set_flash,
3006 static int setup_debugfs(struct adapter *adap)
3008 if (IS_ERR_OR_NULL(adap->debugfs_root))
3011 #ifdef CONFIG_DEBUG_FS
3012 t4_setup_debugfs(adap);
3018 * upper-layer driver support
3022 * Allocate an active-open TID and set it to the supplied value.
3024 int cxgb4_alloc_atid(struct tid_info *t, void *data)
3028 spin_lock_bh(&t->atid_lock);
3030 union aopen_entry *p = t->afree;
3032 atid = (p - t->atid_tab) + t->atid_base;
3037 spin_unlock_bh(&t->atid_lock);
3040 EXPORT_SYMBOL(cxgb4_alloc_atid);
3043 * Release an active-open TID.
3045 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
3047 union aopen_entry *p = &t->atid_tab[atid - t->atid_base];
3049 spin_lock_bh(&t->atid_lock);
3053 spin_unlock_bh(&t->atid_lock);
3055 EXPORT_SYMBOL(cxgb4_free_atid);
3058 * Allocate a server TID and set it to the supplied value.
3060 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
3064 spin_lock_bh(&t->stid_lock);
3065 if (family == PF_INET) {
3066 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
3067 if (stid < t->nstids)
3068 __set_bit(stid, t->stid_bmap);
3072 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
3077 t->stid_tab[stid].data = data;
3078 stid += t->stid_base;
3079 /* IPv6 requires max of 520 bits or 16 cells in TCAM
3080 * This is equivalent to 4 TIDs. With CLIP enabled it
3083 if (family == PF_INET)
3086 t->stids_in_use += 4;
3088 spin_unlock_bh(&t->stid_lock);
3091 EXPORT_SYMBOL(cxgb4_alloc_stid);
3093 /* Allocate a server filter TID and set it to the supplied value.
3095 int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data)
3099 spin_lock_bh(&t->stid_lock);
3100 if (family == PF_INET) {
3101 stid = find_next_zero_bit(t->stid_bmap,
3102 t->nstids + t->nsftids, t->nstids);
3103 if (stid < (t->nstids + t->nsftids))
3104 __set_bit(stid, t->stid_bmap);
3111 t->stid_tab[stid].data = data;
3113 stid += t->sftid_base;
3116 spin_unlock_bh(&t->stid_lock);
3119 EXPORT_SYMBOL(cxgb4_alloc_sftid);
3121 /* Release a server TID.
3123 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
3125 /* Is it a server filter TID? */
3126 if (t->nsftids && (stid >= t->sftid_base)) {
3127 stid -= t->sftid_base;
3130 stid -= t->stid_base;
3133 spin_lock_bh(&t->stid_lock);
3134 if (family == PF_INET)
3135 __clear_bit(stid, t->stid_bmap);
3137 bitmap_release_region(t->stid_bmap, stid, 2);
3138 t->stid_tab[stid].data = NULL;
3139 if (family == PF_INET)
3142 t->stids_in_use -= 4;
3143 spin_unlock_bh(&t->stid_lock);
3145 EXPORT_SYMBOL(cxgb4_free_stid);
3148 * Populate a TID_RELEASE WR. Caller must properly size the skb.
3150 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
3153 struct cpl_tid_release *req;
3155 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
3156 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
3157 INIT_TP_WR(req, tid);
3158 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
3162 * Queue a TID release request and if necessary schedule a work queue to
3165 static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
3168 void **p = &t->tid_tab[tid];
3169 struct adapter *adap = container_of(t, struct adapter, tids);
3171 spin_lock_bh(&adap->tid_release_lock);
3172 *p = adap->tid_release_head;
3173 /* Low 2 bits encode the Tx channel number */
3174 adap->tid_release_head = (void **)((uintptr_t)p | chan);
3175 if (!adap->tid_release_task_busy) {
3176 adap->tid_release_task_busy = true;
3177 queue_work(adap->workq, &adap->tid_release_task);
3179 spin_unlock_bh(&adap->tid_release_lock);
3183 * Process the list of pending TID release requests.
3185 static void process_tid_release_list(struct work_struct *work)
3187 struct sk_buff *skb;
3188 struct adapter *adap;
3190 adap = container_of(work, struct adapter, tid_release_task);
3192 spin_lock_bh(&adap->tid_release_lock);
3193 while (adap->tid_release_head) {
3194 void **p = adap->tid_release_head;
3195 unsigned int chan = (uintptr_t)p & 3;
3196 p = (void *)p - chan;
3198 adap->tid_release_head = *p;
3200 spin_unlock_bh(&adap->tid_release_lock);
3202 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
3204 schedule_timeout_uninterruptible(1);
3206 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
3207 t4_ofld_send(adap, skb);
3208 spin_lock_bh(&adap->tid_release_lock);
3210 adap->tid_release_task_busy = false;
3211 spin_unlock_bh(&adap->tid_release_lock);
3215 * Release a TID and inform HW. If we are unable to allocate the release
3216 * message we defer to a work queue.
3218 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
3221 struct sk_buff *skb;
3222 struct adapter *adap = container_of(t, struct adapter, tids);
3224 old = t->tid_tab[tid];
3225 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
3227 t->tid_tab[tid] = NULL;
3228 mk_tid_release(skb, chan, tid);
3229 t4_ofld_send(adap, skb);
3231 cxgb4_queue_tid_release(t, chan, tid);
3233 atomic_dec(&t->tids_in_use);
3235 EXPORT_SYMBOL(cxgb4_remove_tid);
3238 * Allocate and initialize the TID tables. Returns 0 on success.
3240 static int tid_init(struct tid_info *t)
3243 unsigned int stid_bmap_size;
3244 unsigned int natids = t->natids;
3245 struct adapter *adap = container_of(t, struct adapter, tids);
3247 stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
3248 size = t->ntids * sizeof(*t->tid_tab) +
3249 natids * sizeof(*t->atid_tab) +
3250 t->nstids * sizeof(*t->stid_tab) +
3251 t->nsftids * sizeof(*t->stid_tab) +
3252 stid_bmap_size * sizeof(long) +
3253 t->nftids * sizeof(*t->ftid_tab) +
3254 t->nsftids * sizeof(*t->ftid_tab);
3256 t->tid_tab = t4_alloc_mem(size);
3260 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
3261 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
3262 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
3263 t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
3264 spin_lock_init(&t->stid_lock);
3265 spin_lock_init(&t->atid_lock);
3267 t->stids_in_use = 0;
3269 t->atids_in_use = 0;
3270 atomic_set(&t->tids_in_use, 0);
3272 /* Setup the free list for atid_tab and clear the stid bitmap. */
3275 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
3276 t->afree = t->atid_tab;
3278 bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
3279 /* Reserve stid 0 for T4/T5 adapters */
3280 if (!t->stid_base &&
3281 (is_t4(adap->params.chip) || is_t5(adap->params.chip)))
3282 __set_bit(0, t->stid_bmap);
3288 * cxgb4_create_server - create an IP server
3290 * @stid: the server TID
3291 * @sip: local IP address to bind server to
3292 * @sport: the server's TCP port
3293 * @queue: queue to direct messages from this server to
3295 * Create an IP server for the given port and address.
3296 * Returns <0 on error and one of the %NET_XMIT_* values on success.
3298 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
3299 __be32 sip, __be16 sport, __be16 vlan,
3303 struct sk_buff *skb;
3304 struct adapter *adap;
3305 struct cpl_pass_open_req *req;
3308 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
3312 adap = netdev2adap(dev);
3313 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
3315 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
3316 req->local_port = sport;
3317 req->peer_port = htons(0);
3318 req->local_ip = sip;
3319 req->peer_ip = htonl(0);
3320 chan = rxq_to_chan(&adap->sge, queue);
3321 req->opt0 = cpu_to_be64(TX_CHAN_V(chan));
3322 req->opt1 = cpu_to_be64(CONN_POLICY_V(CPL_CONN_POLICY_ASK) |
3323 SYN_RSS_ENABLE_F | SYN_RSS_QUEUE_V(queue));
3324 ret = t4_mgmt_tx(adap, skb);
3325 return net_xmit_eval(ret);
3327 EXPORT_SYMBOL(cxgb4_create_server);
3329 /* cxgb4_create_server6 - create an IPv6 server
3331 * @stid: the server TID
3332 * @sip: local IPv6 address to bind server to
3333 * @sport: the server's TCP port
3334 * @queue: queue to direct messages from this server to
3336 * Create an IPv6 server for the given port and address.
3337 * Returns <0 on error and one of the %NET_XMIT_* values on success.
3339 int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
3340 const struct in6_addr *sip, __be16 sport,
3344 struct sk_buff *skb;
3345 struct adapter *adap;
3346 struct cpl_pass_open_req6 *req;
3349 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
3353 adap = netdev2adap(dev);
3354 req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
3356 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
3357 req->local_port = sport;
3358 req->peer_port = htons(0);
3359 req->local_ip_hi = *(__be64 *)(sip->s6_addr);
3360 req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
3361 req->peer_ip_hi = cpu_to_be64(0);
3362 req->peer_ip_lo = cpu_to_be64(0);
3363 chan = rxq_to_chan(&adap->sge, queue);
3364 req->opt0 = cpu_to_be64(TX_CHAN_V(chan));
3365 req->opt1 = cpu_to_be64(CONN_POLICY_V(CPL_CONN_POLICY_ASK) |
3366 SYN_RSS_ENABLE_F | SYN_RSS_QUEUE_V(queue));
3367 ret = t4_mgmt_tx(adap, skb);
3368 return net_xmit_eval(ret);
3370 EXPORT_SYMBOL(cxgb4_create_server6);
3372 int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
3373 unsigned int queue, bool ipv6)
3375 struct sk_buff *skb;
3376 struct adapter *adap;
3377 struct cpl_close_listsvr_req *req;
3380 adap = netdev2adap(dev);
3382 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
3386 req = (struct cpl_close_listsvr_req *)__skb_put(skb, sizeof(*req));
3388 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
3389 req->reply_ctrl = htons(NO_REPLY_V(0) | (ipv6 ? LISTSVR_IPV6_V(1) :
3390 LISTSVR_IPV6_V(0)) | QUEUENO_V(queue));
3391 ret = t4_mgmt_tx(adap, skb);
3392 return net_xmit_eval(ret);
3394 EXPORT_SYMBOL(cxgb4_remove_server);
3397 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
3398 * @mtus: the HW MTU table
3399 * @mtu: the target MTU
3400 * @idx: index of selected entry in the MTU table
3402 * Returns the index and the value in the HW MTU table that is closest to
3403 * but does not exceed @mtu, unless @mtu is smaller than any value in the
3404 * table, in which case that smallest available value is selected.
3406 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
3411 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
3417 EXPORT_SYMBOL(cxgb4_best_mtu);
3420 * cxgb4_best_aligned_mtu - find best MTU, [hopefully] data size aligned
3421 * @mtus: the HW MTU table
3422 * @header_size: Header Size
3423 * @data_size_max: maximum Data Segment Size
3424 * @data_size_align: desired Data Segment Size Alignment (2^N)
3425 * @mtu_idxp: HW MTU Table Index return value pointer (possibly NULL)
3427 * Similar to cxgb4_best_mtu() but instead of searching the Hardware
3428 * MTU Table based solely on a Maximum MTU parameter, we break that
3429 * parameter up into a Header Size and Maximum Data Segment Size, and
3430 * provide a desired Data Segment Size Alignment. If we find an MTU in
3431 * the Hardware MTU Table which will result in a Data Segment Size with
3432 * the requested alignment _and_ that MTU isn't "too far" from the
3433 * closest MTU, then we'll return that rather than the closest MTU.
3435 unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
3436 unsigned short header_size,
3437 unsigned short data_size_max,
3438 unsigned short data_size_align,
3439 unsigned int *mtu_idxp)
3441 unsigned short max_mtu = header_size + data_size_max;
3442 unsigned short data_size_align_mask = data_size_align - 1;
3443 int mtu_idx, aligned_mtu_idx;
3445 /* Scan the MTU Table till we find an MTU which is larger than our
3446 * Maximum MTU or we reach the end of the table. Along the way,
3447 * record the last MTU found, if any, which will result in a Data
3448 * Segment Length matching the requested alignment.
3450 for (mtu_idx = 0, aligned_mtu_idx = -1; mtu_idx < NMTUS; mtu_idx++) {
3451 unsigned short data_size = mtus[mtu_idx] - header_size;
3453 /* If this MTU minus the Header Size would result in a
3454 * Data Segment Size of the desired alignment, remember it.
3456 if ((data_size & data_size_align_mask) == 0)
3457 aligned_mtu_idx = mtu_idx;
3459 /* If we're not at the end of the Hardware MTU Table and the
3460 * next element is larger than our Maximum MTU, drop out of
3463 if (mtu_idx+1 < NMTUS && mtus[mtu_idx+1] > max_mtu)
3467 /* If we fell out of the loop because we ran to the end of the table,
3468 * then we just have to use the last [largest] entry.
3470 if (mtu_idx == NMTUS)
3473 /* If we found an MTU which resulted in the requested Data Segment
3474 * Length alignment and that's "not far" from the largest MTU which is
3475 * less than or equal to the maximum MTU, then use that.
3477 if (aligned_mtu_idx >= 0 &&
3478 mtu_idx - aligned_mtu_idx <= 1)
3479 mtu_idx = aligned_mtu_idx;
3481 /* If the caller has passed in an MTU Index pointer, pass the
3482 * MTU Index back. Return the MTU value.
3485 *mtu_idxp = mtu_idx;
3486 return mtus[mtu_idx];
3488 EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
3491 * cxgb4_port_chan - get the HW channel of a port
3492 * @dev: the net device for the port
3494 * Return the HW Tx channel of the given port.
3496 unsigned int cxgb4_port_chan(const struct net_device *dev)
3498 return netdev2pinfo(dev)->tx_chan;
3500 EXPORT_SYMBOL(cxgb4_port_chan);
3502 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
3504 struct adapter *adap = netdev2adap(dev);
3505 u32 v1, v2, lp_count, hp_count;
3507 v1 = t4_read_reg(adap, SGE_DBFIFO_STATUS_A);
3508 v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2_A);
3509 if (is_t4(adap->params.chip)) {
3510 lp_count = LP_COUNT_G(v1);
3511 hp_count = HP_COUNT_G(v1);
3513 lp_count = LP_COUNT_T5_G(v1);
3514 hp_count = HP_COUNT_T5_G(v2);
3516 return lpfifo ? lp_count : hp_count;
3518 EXPORT_SYMBOL(cxgb4_dbfifo_count);
3521 * cxgb4_port_viid - get the VI id of a port
3522 * @dev: the net device for the port
3524 * Return the VI id of the given port.
3526 unsigned int cxgb4_port_viid(const struct net_device *dev)
3528 return netdev2pinfo(dev)->viid;
3530 EXPORT_SYMBOL(cxgb4_port_viid);
3533 * cxgb4_port_idx - get the index of a port
3534 * @dev: the net device for the port
3536 * Return the index of the given port.
3538 unsigned int cxgb4_port_idx(const struct net_device *dev)
3540 return netdev2pinfo(dev)->port_id;
3542 EXPORT_SYMBOL(cxgb4_port_idx);
3544 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
3545 struct tp_tcp_stats *v6)
3547 struct adapter *adap = pci_get_drvdata(pdev);
3549 spin_lock(&adap->stats_lock);
3550 t4_tp_get_tcp_stats(adap, v4, v6);
3551 spin_unlock(&adap->stats_lock);
3553 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
3555 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
3556 const unsigned int *pgsz_order)
3558 struct adapter *adap = netdev2adap(dev);
3560 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK_A, tag_mask);
3561 t4_write_reg(adap, ULP_RX_ISCSI_PSZ_A, HPZ0_V(pgsz_order[0]) |
3562 HPZ1_V(pgsz_order[1]) | HPZ2_V(pgsz_order[2]) |
3563 HPZ3_V(pgsz_order[3]));
3565 EXPORT_SYMBOL(cxgb4_iscsi_init);
3567 int cxgb4_flush_eq_cache(struct net_device *dev)
3569 struct adapter *adap = netdev2adap(dev);
3572 ret = t4_fwaddrspace_write(adap, adap->mbox,
3573 0xe1000000 + SGE_CTXT_CMD_A, 0x20000000);
3576 EXPORT_SYMBOL(cxgb4_flush_eq_cache);
3578 static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx)
3580 u32 addr = t4_read_reg(adap, SGE_DBQ_CTXT_BADDR_A) + 24 * qid + 8;
3584 spin_lock(&adap->win0_lock);
3585 ret = t4_memory_rw(adap, 0, MEM_EDC0, addr,
3586 sizeof(indices), (__be32 *)&indices,
3588 spin_unlock(&adap->win0_lock);
3590 *cidx = (be64_to_cpu(indices) >> 25) & 0xffff;
3591 *pidx = (be64_to_cpu(indices) >> 9) & 0xffff;
3596 int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
3599 struct adapter *adap = netdev2adap(dev);
3600 u16 hw_pidx, hw_cidx;
3603 ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx);
3607 if (pidx != hw_pidx) {
3611 if (pidx >= hw_pidx)
3612 delta = pidx - hw_pidx;
3614 delta = size - hw_pidx + pidx;
3616 if (is_t4(adap->params.chip))
3617 val = PIDX_V(delta);
3619 val = PIDX_T5_V(delta);
3621 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
3627 EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
3629 void cxgb4_disable_db_coalescing(struct net_device *dev)
3631 struct adapter *adap;
3633 adap = netdev2adap(dev);
3634 t4_set_reg_field(adap, SGE_DOORBELL_CONTROL_A, NOCOALESCE_F,
3637 EXPORT_SYMBOL(cxgb4_disable_db_coalescing);
3639 void cxgb4_enable_db_coalescing(struct net_device *dev)
3641 struct adapter *adap;
3643 adap = netdev2adap(dev);
3644 t4_set_reg_field(adap, SGE_DOORBELL_CONTROL_A, NOCOALESCE_F, 0);
3646 EXPORT_SYMBOL(cxgb4_enable_db_coalescing);
3648 int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
3650 struct adapter *adap;
3651 u32 offset, memtype, memaddr;
3652 u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
3653 u32 edc0_end, edc1_end, mc0_end, mc1_end;
3656 adap = netdev2adap(dev);
3658 offset = ((stag >> 8) * 32) + adap->vres.stag.start;
3660 /* Figure out where the offset lands in the Memory Type/Address scheme.
3661 * This code assumes that the memory is laid out starting at offset 0
3662 * with no breaks as: EDC0, EDC1, MC0, MC1. All cards have both EDC0
3663 * and EDC1. Some cards will have neither MC0 nor MC1, most cards have
3664 * MC0, and some have both MC0 and MC1.
3666 size = t4_read_reg(adap, MA_EDRAM0_BAR_A);
3667 edc0_size = EDRAM0_SIZE_G(size) << 20;
3668 size = t4_read_reg(adap, MA_EDRAM1_BAR_A);
3669 edc1_size = EDRAM1_SIZE_G(size) << 20;
3670 size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
3671 mc0_size = EXT_MEM0_SIZE_G(size) << 20;
3673 edc0_end = edc0_size;
3674 edc1_end = edc0_end + edc1_size;
3675 mc0_end = edc1_end + mc0_size;
3677 if (offset < edc0_end) {
3680 } else if (offset < edc1_end) {
3682 memaddr = offset - edc0_end;
3684 if (offset < mc0_end) {
3686 memaddr = offset - edc1_end;
3687 } else if (is_t4(adap->params.chip)) {
3688 /* T4 only has a single memory channel */
3691 size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
3692 mc1_size = EXT_MEM1_SIZE_G(size) << 20;
3693 mc1_end = mc0_end + mc1_size;
3694 if (offset < mc1_end) {
3696 memaddr = offset - mc0_end;
3698 /* offset beyond the end of any memory */
3704 spin_lock(&adap->win0_lock);
3705 ret = t4_memory_rw(adap, 0, memtype, memaddr, 32, tpte, T4_MEMORY_READ);
3706 spin_unlock(&adap->win0_lock);
3710 dev_err(adap->pdev_dev, "stag %#x, offset %#x out of range\n",
3714 EXPORT_SYMBOL(cxgb4_read_tpte);
3716 u64 cxgb4_read_sge_timestamp(struct net_device *dev)
3719 struct adapter *adap;
3721 adap = netdev2adap(dev);
3722 lo = t4_read_reg(adap, SGE_TIMESTAMP_LO_A);
3723 hi = TSVAL_G(t4_read_reg(adap, SGE_TIMESTAMP_HI_A));
3725 return ((u64)hi << 32) | (u64)lo;
3727 EXPORT_SYMBOL(cxgb4_read_sge_timestamp);
3729 int cxgb4_bar2_sge_qregs(struct net_device *dev,
3731 enum cxgb4_bar2_qtype qtype,
3733 unsigned int *pbar2_qid)
3735 return cxgb4_t4_bar2_sge_qregs(netdev2adap(dev),
3737 (qtype == CXGB4_BAR2_QTYPE_EGRESS
3738 ? T4_BAR2_QTYPE_EGRESS
3739 : T4_BAR2_QTYPE_INGRESS),
3743 EXPORT_SYMBOL(cxgb4_bar2_sge_qregs);
3745 static struct pci_driver cxgb4_driver;
3747 static void check_neigh_update(struct neighbour *neigh)
3749 const struct device *parent;
3750 const struct net_device *netdev = neigh->dev;
3752 if (netdev->priv_flags & IFF_802_1Q_VLAN)
3753 netdev = vlan_dev_real_dev(netdev);
3754 parent = netdev->dev.parent;
3755 if (parent && parent->driver == &cxgb4_driver.driver)
3756 t4_l2t_update(dev_get_drvdata(parent), neigh);
3759 static int netevent_cb(struct notifier_block *nb, unsigned long event,
3763 case NETEVENT_NEIGH_UPDATE:
3764 check_neigh_update(data);
3766 case NETEVENT_REDIRECT:
3773 static bool netevent_registered;
3774 static struct notifier_block cxgb4_netevent_nb = {
3775 .notifier_call = netevent_cb
3778 static void drain_db_fifo(struct adapter *adap, int usecs)
3780 u32 v1, v2, lp_count, hp_count;
3783 v1 = t4_read_reg(adap, SGE_DBFIFO_STATUS_A);
3784 v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2_A);
3785 if (is_t4(adap->params.chip)) {
3786 lp_count = LP_COUNT_G(v1);
3787 hp_count = HP_COUNT_G(v1);
3789 lp_count = LP_COUNT_T5_G(v1);
3790 hp_count = HP_COUNT_T5_G(v2);
3793 if (lp_count == 0 && hp_count == 0)
3795 set_current_state(TASK_UNINTERRUPTIBLE);
3796 schedule_timeout(usecs_to_jiffies(usecs));
3800 static void disable_txq_db(struct sge_txq *q)
3802 unsigned long flags;
3804 spin_lock_irqsave(&q->db_lock, flags);
3806 spin_unlock_irqrestore(&q->db_lock, flags);
3809 static void enable_txq_db(struct adapter *adap, struct sge_txq *q)
3811 spin_lock_irq(&q->db_lock);
3812 if (q->db_pidx_inc) {
3813 /* Make sure that all writes to the TX descriptors
3814 * are committed before we tell HW about them.
3817 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
3818 QID_V(q->cntxt_id) | PIDX_V(q->db_pidx_inc));
3822 spin_unlock_irq(&q->db_lock);
3825 static void disable_dbs(struct adapter *adap)
3829 for_each_ethrxq(&adap->sge, i)
3830 disable_txq_db(&adap->sge.ethtxq[i].q);
3831 for_each_ofldrxq(&adap->sge, i)
3832 disable_txq_db(&adap->sge.ofldtxq[i].q);
3833 for_each_port(adap, i)
3834 disable_txq_db(&adap->sge.ctrlq[i].q);
3837 static void enable_dbs(struct adapter *adap)
3841 for_each_ethrxq(&adap->sge, i)
3842 enable_txq_db(adap, &adap->sge.ethtxq[i].q);
3843 for_each_ofldrxq(&adap->sge, i)
3844 enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
3845 for_each_port(adap, i)
3846 enable_txq_db(adap, &adap->sge.ctrlq[i].q);
3849 static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
3851 if (adap->uld_handle[CXGB4_ULD_RDMA])
3852 ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
3856 static void process_db_full(struct work_struct *work)
3858 struct adapter *adap;
3860 adap = container_of(work, struct adapter, db_full_task);
3862 drain_db_fifo(adap, dbfifo_drain_delay);
3864 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
3865 t4_set_reg_field(adap, SGE_INT_ENABLE3_A,
3866 DBFIFO_HP_INT_F | DBFIFO_LP_INT_F,
3867 DBFIFO_HP_INT_F | DBFIFO_LP_INT_F);
3870 static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
3872 u16 hw_pidx, hw_cidx;
3875 spin_lock_irq(&q->db_lock);
3876 ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
3879 if (q->db_pidx != hw_pidx) {
3883 if (q->db_pidx >= hw_pidx)
3884 delta = q->db_pidx - hw_pidx;
3886 delta = q->size - hw_pidx + q->db_pidx;
3888 if (is_t4(adap->params.chip))
3889 val = PIDX_V(delta);
3891 val = PIDX_T5_V(delta);
3893 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
3894 QID_V(q->cntxt_id) | val);
3899 spin_unlock_irq(&q->db_lock);
3901 CH_WARN(adap, "DB drop recovery failed.\n");
3903 static void recover_all_queues(struct adapter *adap)
3907 for_each_ethrxq(&adap->sge, i)
3908 sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
3909 for_each_ofldrxq(&adap->sge, i)
3910 sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
3911 for_each_port(adap, i)
3912 sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
3915 static void process_db_drop(struct work_struct *work)
3917 struct adapter *adap;
3919 adap = container_of(work, struct adapter, db_drop_task);
3921 if (is_t4(adap->params.chip)) {
3922 drain_db_fifo(adap, dbfifo_drain_delay);
3923 notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
3924 drain_db_fifo(adap, dbfifo_drain_delay);
3925 recover_all_queues(adap);
3926 drain_db_fifo(adap, dbfifo_drain_delay);
3928 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
3930 u32 dropped_db = t4_read_reg(adap, 0x010ac);
3931 u16 qid = (dropped_db >> 15) & 0x1ffff;
3932 u16 pidx_inc = dropped_db & 0x1fff;
3934 unsigned int bar2_qid;
3937 ret = cxgb4_t4_bar2_sge_qregs(adap, qid, T4_BAR2_QTYPE_EGRESS,
3938 &bar2_qoffset, &bar2_qid);
3940 dev_err(adap->pdev_dev, "doorbell drop recovery: "
3941 "qid=%d, pidx_inc=%d\n", qid, pidx_inc);
3943 writel(PIDX_T5_V(pidx_inc) | QID_V(bar2_qid),
3944 adap->bar2 + bar2_qoffset + SGE_UDB_KDOORBELL);
3946 /* Re-enable BAR2 WC */
3947 t4_set_reg_field(adap, 0x10b0, 1<<15, 1<<15);
3950 t4_set_reg_field(adap, SGE_DOORBELL_CONTROL_A, DROPPED_DB_F, 0);
3953 void t4_db_full(struct adapter *adap)
3955 if (is_t4(adap->params.chip)) {
3957 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
3958 t4_set_reg_field(adap, SGE_INT_ENABLE3_A,
3959 DBFIFO_HP_INT_F | DBFIFO_LP_INT_F, 0);
3960 queue_work(adap->workq, &adap->db_full_task);
3964 void t4_db_dropped(struct adapter *adap)
3966 if (is_t4(adap->params.chip)) {
3968 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
3970 queue_work(adap->workq, &adap->db_drop_task);
3973 static void uld_attach(struct adapter *adap, unsigned int uld)
3976 struct cxgb4_lld_info lli;
3979 lli.pdev = adap->pdev;
3981 lli.l2t = adap->l2t;
3982 lli.tids = &adap->tids;
3983 lli.ports = adap->port;
3984 lli.vr = &adap->vres;
3985 lli.mtus = adap->params.mtus;
3986 if (uld == CXGB4_ULD_RDMA) {
3987 lli.rxq_ids = adap->sge.rdma_rxq;
3988 lli.ciq_ids = adap->sge.rdma_ciq;
3989 lli.nrxq = adap->sge.rdmaqs;
3990 lli.nciq = adap->sge.rdmaciqs;
3991 } else if (uld == CXGB4_ULD_ISCSI) {
3992 lli.rxq_ids = adap->sge.ofld_rxq;
3993 lli.nrxq = adap->sge.ofldqsets;
3995 lli.ntxq = adap->sge.ofldqsets;
3996 lli.nchan = adap->params.nports;
3997 lli.nports = adap->params.nports;
3998 lli.wr_cred = adap->params.ofldq_wr_cred;
3999 lli.adapter_type = adap->params.chip;
4000 lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
4001 lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
4002 lli.udb_density = 1 << adap->params.sge.eq_qpp;
4003 lli.ucq_density = 1 << adap->params.sge.iq_qpp;
4004 lli.filt_mode = adap->params.tp.vlan_pri_map;
4005 /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
4006 for (i = 0; i < NCHAN; i++)
4008 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
4009 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
4010 lli.fw_vers = adap->params.fw_vers;
4011 lli.dbfifo_int_thresh = dbfifo_int_thresh;
4012 lli.sge_ingpadboundary = adap->sge.fl_align;
4013 lli.sge_egrstatuspagesize = adap->sge.stat_len;
4014 lli.sge_pktshift = adap->sge.pktshift;
4015 lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
4016 lli.max_ordird_qp = adap->params.max_ordird_qp;
4017 lli.max_ird_adapter = adap->params.max_ird_adapter;
4018 lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
4020 handle = ulds[uld].add(&lli);
4021 if (IS_ERR(handle)) {
4022 dev_warn(adap->pdev_dev,
4023 "could not attach to the %s driver, error %ld\n",
4024 uld_str[uld], PTR_ERR(handle));
4028 adap->uld_handle[uld] = handle;
4030 if (!netevent_registered) {
4031 register_netevent_notifier(&cxgb4_netevent_nb);
4032 netevent_registered = true;
4035 if (adap->flags & FULL_INIT_DONE)
4036 ulds[uld].state_change(handle, CXGB4_STATE_UP);
4039 static void attach_ulds(struct adapter *adap)
4043 spin_lock(&adap_rcu_lock);
4044 list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
4045 spin_unlock(&adap_rcu_lock);
4047 mutex_lock(&uld_mutex);
4048 list_add_tail(&adap->list_node, &adapter_list);
4049 for (i = 0; i < CXGB4_ULD_MAX; i++)
4051 uld_attach(adap, i);
4052 mutex_unlock(&uld_mutex);
4055 static void detach_ulds(struct adapter *adap)
4059 mutex_lock(&uld_mutex);
4060 list_del(&adap->list_node);
4061 for (i = 0; i < CXGB4_ULD_MAX; i++)
4062 if (adap->uld_handle[i]) {
4063 ulds[i].state_change(adap->uld_handle[i],
4064 CXGB4_STATE_DETACH);
4065 adap->uld_handle[i] = NULL;
4067 if (netevent_registered && list_empty(&adapter_list)) {
4068 unregister_netevent_notifier(&cxgb4_netevent_nb);
4069 netevent_registered = false;
4071 mutex_unlock(&uld_mutex);
4073 spin_lock(&adap_rcu_lock);
4074 list_del_rcu(&adap->rcu_node);
4075 spin_unlock(&adap_rcu_lock);
4078 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
4082 mutex_lock(&uld_mutex);
4083 for (i = 0; i < CXGB4_ULD_MAX; i++)
4084 if (adap->uld_handle[i])
4085 ulds[i].state_change(adap->uld_handle[i], new_state);
4086 mutex_unlock(&uld_mutex);
4090 * cxgb4_register_uld - register an upper-layer driver
4091 * @type: the ULD type
4092 * @p: the ULD methods
4094 * Registers an upper-layer driver with this driver and notifies the ULD
4095 * about any presently available devices that support its type. Returns
4096 * %-EBUSY if a ULD of the same type is already registered.
4098 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
4101 struct adapter *adap;
4103 if (type >= CXGB4_ULD_MAX)
4105 mutex_lock(&uld_mutex);
4106 if (ulds[type].add) {
4111 list_for_each_entry(adap, &adapter_list, list_node)
4112 uld_attach(adap, type);
4113 out: mutex_unlock(&uld_mutex);
4116 EXPORT_SYMBOL(cxgb4_register_uld);
4119 * cxgb4_unregister_uld - unregister an upper-layer driver
4120 * @type: the ULD type
4122 * Unregisters an existing upper-layer driver.
4124 int cxgb4_unregister_uld(enum cxgb4_uld type)
4126 struct adapter *adap;
4128 if (type >= CXGB4_ULD_MAX)
4130 mutex_lock(&uld_mutex);
4131 list_for_each_entry(adap, &adapter_list, list_node)
4132 adap->uld_handle[type] = NULL;
4133 ulds[type].add = NULL;
4134 mutex_unlock(&uld_mutex);
4137 EXPORT_SYMBOL(cxgb4_unregister_uld);
4139 #if IS_ENABLED(CONFIG_IPV6)
4140 static int cxgb4_inet6addr_handler(struct notifier_block *this,
4141 unsigned long event, void *data)
4143 struct inet6_ifaddr *ifa = data;
4144 struct net_device *event_dev = ifa->idev->dev;
4145 const struct device *parent = NULL;
4146 #if IS_ENABLED(CONFIG_BONDING)
4147 struct adapter *adap;
4149 if (event_dev->priv_flags & IFF_802_1Q_VLAN)
4150 event_dev = vlan_dev_real_dev(event_dev);
4151 #if IS_ENABLED(CONFIG_BONDING)
4152 if (event_dev->flags & IFF_MASTER) {
4153 list_for_each_entry(adap, &adapter_list, list_node) {
4156 cxgb4_clip_get(adap->port[0],
4157 (const u32 *)ifa, 1);
4160 cxgb4_clip_release(adap->port[0],
4161 (const u32 *)ifa, 1);
4172 parent = event_dev->dev.parent;
4174 if (parent && parent->driver == &cxgb4_driver.driver) {
4177 cxgb4_clip_get(event_dev, (const u32 *)ifa, 1);
4180 cxgb4_clip_release(event_dev, (const u32 *)ifa, 1);
4189 static bool inet6addr_registered;
4190 static struct notifier_block cxgb4_inet6addr_notifier = {
4191 .notifier_call = cxgb4_inet6addr_handler
4194 static void update_clip(const struct adapter *adap)
4197 struct net_device *dev;
4202 for (i = 0; i < MAX_NPORTS; i++) {
4203 dev = adap->port[i];
4207 ret = cxgb4_update_root_dev_clip(dev);
4214 #endif /* IS_ENABLED(CONFIG_IPV6) */
4217 * cxgb_up - enable the adapter
4218 * @adap: adapter being enabled
4220 * Called when the first port is enabled, this function performs the
4221 * actions necessary to make an adapter operational, such as completing
4222 * the initialization of HW modules, and enabling interrupts.
4224 * Must be called with the rtnl lock held.
4226 static int cxgb_up(struct adapter *adap)
4230 err = setup_sge_queues(adap);
4233 err = setup_rss(adap);
4237 if (adap->flags & USING_MSIX) {
4238 name_msix_vecs(adap);
4239 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
4240 adap->msix_info[0].desc, adap);
4244 err = request_msix_queue_irqs(adap);
4246 free_irq(adap->msix_info[0].vec, adap);
4250 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
4251 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
4252 adap->port[0]->name, adap);
4258 t4_intr_enable(adap);
4259 adap->flags |= FULL_INIT_DONE;
4260 notify_ulds(adap, CXGB4_STATE_UP);
4261 #if IS_ENABLED(CONFIG_IPV6)
4267 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
4269 t4_free_sge_resources(adap);
4273 static void cxgb_down(struct adapter *adapter)
4275 cancel_work_sync(&adapter->tid_release_task);
4276 cancel_work_sync(&adapter->db_full_task);
4277 cancel_work_sync(&adapter->db_drop_task);
4278 adapter->tid_release_task_busy = false;
4279 adapter->tid_release_head = NULL;
4281 t4_sge_stop(adapter);
4282 t4_free_sge_resources(adapter);
4283 adapter->flags &= ~FULL_INIT_DONE;
4287 * net_device operations
4289 static int cxgb_open(struct net_device *dev)
4292 struct port_info *pi = netdev_priv(dev);
4293 struct adapter *adapter = pi->adapter;
4295 netif_carrier_off(dev);
4297 if (!(adapter->flags & FULL_INIT_DONE)) {
4298 err = cxgb_up(adapter);
4303 err = link_start(dev);
4305 netif_tx_start_all_queues(dev);
4309 static int cxgb_close(struct net_device *dev)
4311 struct port_info *pi = netdev_priv(dev);
4312 struct adapter *adapter = pi->adapter;
4314 netif_tx_stop_all_queues(dev);
4315 netif_carrier_off(dev);
4316 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
4319 /* Return an error number if the indicated filter isn't writable ...
4321 static int writable_filter(struct filter_entry *f)
4331 /* Delete the filter at the specified index (if valid). The checks for all
4332 * the common problems with doing this like the filter being locked, currently
4333 * pending in another operation, etc.
4335 static int delete_filter(struct adapter *adapter, unsigned int fidx)
4337 struct filter_entry *f;
4340 if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
4343 f = &adapter->tids.ftid_tab[fidx];
4344 ret = writable_filter(f);
4348 return del_filter_wr(adapter, fidx);
4353 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
4354 __be32 sip, __be16 sport, __be16 vlan,
4355 unsigned int queue, unsigned char port, unsigned char mask)
4358 struct filter_entry *f;
4359 struct adapter *adap;
4363 adap = netdev2adap(dev);
4365 /* Adjust stid to correct filter index */
4366 stid -= adap->tids.sftid_base;
4367 stid += adap->tids.nftids;
4369 /* Check to make sure the filter requested is writable ...
4371 f = &adap->tids.ftid_tab[stid];
4372 ret = writable_filter(f);
4376 /* Clear out any old resources being used by the filter before
4377 * we start constructing the new filter.
4380 clear_filter(adap, f);
4382 /* Clear out filter specifications */
4383 memset(&f->fs, 0, sizeof(struct ch_filter_specification));
4384 f->fs.val.lport = cpu_to_be16(sport);
4385 f->fs.mask.lport = ~0;
4387 if ((val[0] | val[1] | val[2] | val[3]) != 0) {
4388 for (i = 0; i < 4; i++) {
4389 f->fs.val.lip[i] = val[i];
4390 f->fs.mask.lip[i] = ~0;
4392 if (adap->params.tp.vlan_pri_map & PORT_F) {
4393 f->fs.val.iport = port;
4394 f->fs.mask.iport = mask;
4398 if (adap->params.tp.vlan_pri_map & PROTOCOL_F) {
4399 f->fs.val.proto = IPPROTO_TCP;
4400 f->fs.mask.proto = ~0;
4405 /* Mark filter as locked */
4409 ret = set_filter_wr(adap, stid);
4411 clear_filter(adap, f);
4417 EXPORT_SYMBOL(cxgb4_create_server_filter);
4419 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
4420 unsigned int queue, bool ipv6)
4423 struct filter_entry *f;
4424 struct adapter *adap;
4426 adap = netdev2adap(dev);
4428 /* Adjust stid to correct filter index */
4429 stid -= adap->tids.sftid_base;
4430 stid += adap->tids.nftids;
4432 f = &adap->tids.ftid_tab[stid];
4433 /* Unlock the filter */
4436 ret = delete_filter(adap, stid);
4442 EXPORT_SYMBOL(cxgb4_remove_server_filter);
4444 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
4445 struct rtnl_link_stats64 *ns)
4447 struct port_stats stats;
4448 struct port_info *p = netdev_priv(dev);
4449 struct adapter *adapter = p->adapter;
4451 /* Block retrieving statistics during EEH error
4452 * recovery. Otherwise, the recovery might fail
4453 * and the PCI device will be removed permanently
4455 spin_lock(&adapter->stats_lock);
4456 if (!netif_device_present(dev)) {
4457 spin_unlock(&adapter->stats_lock);
4460 t4_get_port_stats(adapter, p->tx_chan, &stats);
4461 spin_unlock(&adapter->stats_lock);
4463 ns->tx_bytes = stats.tx_octets;
4464 ns->tx_packets = stats.tx_frames;
4465 ns->rx_bytes = stats.rx_octets;
4466 ns->rx_packets = stats.rx_frames;
4467 ns->multicast = stats.rx_mcast_frames;
4469 /* detailed rx_errors */
4470 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
4472 ns->rx_over_errors = 0;
4473 ns->rx_crc_errors = stats.rx_fcs_err;
4474 ns->rx_frame_errors = stats.rx_symbol_err;
4475 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
4476 stats.rx_ovflow2 + stats.rx_ovflow3 +
4477 stats.rx_trunc0 + stats.rx_trunc1 +
4478 stats.rx_trunc2 + stats.rx_trunc3;
4479 ns->rx_missed_errors = 0;
4481 /* detailed tx_errors */
4482 ns->tx_aborted_errors = 0;
4483 ns->tx_carrier_errors = 0;
4484 ns->tx_fifo_errors = 0;
4485 ns->tx_heartbeat_errors = 0;
4486 ns->tx_window_errors = 0;
4488 ns->tx_errors = stats.tx_error_frames;
4489 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
4490 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
4494 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
4497 int ret = 0, prtad, devad;
4498 struct port_info *pi = netdev_priv(dev);
4499 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
4503 if (pi->mdio_addr < 0)
4505 data->phy_id = pi->mdio_addr;
4509 if (mdio_phy_id_is_c45(data->phy_id)) {
4510 prtad = mdio_phy_id_prtad(data->phy_id);
4511 devad = mdio_phy_id_devad(data->phy_id);
4512 } else if (data->phy_id < 32) {
4513 prtad = data->phy_id;
4515 data->reg_num &= 0x1f;
4519 mbox = pi->adapter->fn;
4520 if (cmd == SIOCGMIIREG)
4521 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
4522 data->reg_num, &data->val_out);
4524 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
4525 data->reg_num, data->val_in);
4533 static void cxgb_set_rxmode(struct net_device *dev)
4535 /* unfortunately we can't return errors to the stack */
4536 set_rxmode(dev, -1, false);
4539 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
4542 struct port_info *pi = netdev_priv(dev);
4544 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
4546 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
4553 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
4556 struct sockaddr *addr = p;
4557 struct port_info *pi = netdev_priv(dev);
4559 if (!is_valid_ether_addr(addr->sa_data))
4560 return -EADDRNOTAVAIL;
4562 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
4563 pi->xact_addr_filt, addr->sa_data, true, true);
4567 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
4568 pi->xact_addr_filt = ret;
4572 #ifdef CONFIG_NET_POLL_CONTROLLER
4573 static void cxgb_netpoll(struct net_device *dev)
4575 struct port_info *pi = netdev_priv(dev);
4576 struct adapter *adap = pi->adapter;
4578 if (adap->flags & USING_MSIX) {
4580 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
4582 for (i = pi->nqsets; i; i--, rx++)
4583 t4_sge_intr_msix(0, &rx->rspq);
4585 t4_intr_handler(adap)(0, adap);
4589 static const struct net_device_ops cxgb4_netdev_ops = {
4590 .ndo_open = cxgb_open,
4591 .ndo_stop = cxgb_close,
4592 .ndo_start_xmit = t4_eth_xmit,
4593 .ndo_select_queue = cxgb_select_queue,
4594 .ndo_get_stats64 = cxgb_get_stats,
4595 .ndo_set_rx_mode = cxgb_set_rxmode,
4596 .ndo_set_mac_address = cxgb_set_mac_addr,
4597 .ndo_set_features = cxgb_set_features,
4598 .ndo_validate_addr = eth_validate_addr,
4599 .ndo_do_ioctl = cxgb_ioctl,
4600 .ndo_change_mtu = cxgb_change_mtu,
4601 #ifdef CONFIG_NET_POLL_CONTROLLER
4602 .ndo_poll_controller = cxgb_netpoll,
4604 #ifdef CONFIG_NET_RX_BUSY_POLL
4605 .ndo_busy_poll = cxgb_busy_poll,
4610 void t4_fatal_err(struct adapter *adap)
4612 t4_set_reg_field(adap, SGE_CONTROL_A, GLOBALENABLE_F, 0);
4613 t4_intr_disable(adap);
4614 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
4617 /* Return the specified PCI-E Configuration Space register from our Physical
4618 * Function. We try first via a Firmware LDST Command since we prefer to let
4619 * the firmware own all of these registers, but if that fails we go for it
4620 * directly ourselves.
4622 static u32 t4_read_pcie_cfg4(struct adapter *adap, int reg)
4624 struct fw_ldst_cmd ldst_cmd;
4628 /* Construct and send the Firmware LDST Command to retrieve the
4629 * specified PCI-E Configuration Space register.
4631 memset(&ldst_cmd, 0, sizeof(ldst_cmd));
4632 ldst_cmd.op_to_addrspace =
4633 htonl(FW_CMD_OP_V(FW_LDST_CMD) |
4636 FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_FUNC_PCIE));
4637 ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd));
4638 ldst_cmd.u.pcie.select_naccess = FW_LDST_CMD_NACCESS_V(1);
4639 ldst_cmd.u.pcie.ctrl_to_fn =
4640 (FW_LDST_CMD_LC_F | FW_LDST_CMD_FN_V(adap->fn));
4641 ldst_cmd.u.pcie.r = reg;
4642 ret = t4_wr_mbox(adap, adap->mbox, &ldst_cmd, sizeof(ldst_cmd),
4645 /* If the LDST Command suucceeded, exctract the returned register
4646 * value. Otherwise read it directly ourself.
4649 val = ntohl(ldst_cmd.u.pcie.data[0]);
4651 t4_hw_pci_read_cfg4(adap, reg, &val);
4656 static void setup_memwin(struct adapter *adap)
4658 u32 mem_win0_base, mem_win1_base, mem_win2_base, mem_win2_aperture;
4660 if (is_t4(adap->params.chip)) {
4663 /* Truncation intentional: we only read the bottom 32-bits of
4664 * the 64-bit BAR0/BAR1 ... We use the hardware backdoor
4665 * mechanism to read BAR0 instead of using
4666 * pci_resource_start() because we could be operating from
4667 * within a Virtual Machine which is trapping our accesses to
4668 * our Configuration Space and we need to set up the PCI-E
4669 * Memory Window decoders with the actual addresses which will
4670 * be coming across the PCI-E link.
4672 bar0 = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_0);
4673 bar0 &= PCI_BASE_ADDRESS_MEM_MASK;
4674 adap->t4_bar0 = bar0;
4676 mem_win0_base = bar0 + MEMWIN0_BASE;
4677 mem_win1_base = bar0 + MEMWIN1_BASE;
4678 mem_win2_base = bar0 + MEMWIN2_BASE;
4679 mem_win2_aperture = MEMWIN2_APERTURE;
4681 /* For T5, only relative offset inside the PCIe BAR is passed */
4682 mem_win0_base = MEMWIN0_BASE;
4683 mem_win1_base = MEMWIN1_BASE;
4684 mem_win2_base = MEMWIN2_BASE_T5;
4685 mem_win2_aperture = MEMWIN2_APERTURE_T5;
4687 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 0),
4688 mem_win0_base | BIR_V(0) |
4689 WINDOW_V(ilog2(MEMWIN0_APERTURE) - 10));
4690 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 1),
4691 mem_win1_base | BIR_V(0) |
4692 WINDOW_V(ilog2(MEMWIN1_APERTURE) - 10));
4693 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 2),
4694 mem_win2_base | BIR_V(0) |
4695 WINDOW_V(ilog2(mem_win2_aperture) - 10));
4696 t4_read_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 2));
4699 static void setup_memwin_rdma(struct adapter *adap)
4701 if (adap->vres.ocq.size) {
4705 start = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_2);
4706 start &= PCI_BASE_ADDRESS_MEM_MASK;
4707 start += OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
4708 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
4710 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 3),
4711 start | BIR_V(1) | WINDOW_V(ilog2(sz_kb)));
4713 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, 3),
4714 adap->vres.ocq.start);
4716 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, 3));
4720 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
4725 /* get device capabilities */
4726 memset(c, 0, sizeof(*c));
4727 c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4728 FW_CMD_REQUEST_F | FW_CMD_READ_F);
4729 c->cfvalid_to_len16 = htonl(FW_LEN16(*c));
4730 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
4734 /* select capabilities we'll be using */
4735 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
4737 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
4739 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
4740 } else if (vf_acls) {
4741 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
4744 c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4745 FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
4746 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
4750 ret = t4_config_glbl_rss(adap, adap->fn,
4751 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
4752 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN_F |
4753 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP_F);
4757 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, adap->sge.egr_sz, 64,
4758 MAX_INGQ, 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF,
4765 /* tweak some settings */
4766 t4_write_reg(adap, TP_SHIFT_CNT_A, 0x64f8849);
4767 t4_write_reg(adap, ULP_RX_TDDP_PSZ_A, HPZ0_V(PAGE_SHIFT - 12));
4768 t4_write_reg(adap, TP_PIO_ADDR_A, TP_INGRESS_CONFIG_A);
4769 v = t4_read_reg(adap, TP_PIO_DATA_A);
4770 t4_write_reg(adap, TP_PIO_DATA_A, v & ~CSUM_HAS_PSEUDO_HDR_F);
4772 /* first 4 Tx modulation queues point to consecutive Tx channels */
4773 adap->params.tp.tx_modq_map = 0xE4;
4774 t4_write_reg(adap, TP_TX_MOD_QUEUE_REQ_MAP_A,
4775 TX_MOD_QUEUE_REQ_MAP_V(adap->params.tp.tx_modq_map));
4777 /* associate each Tx modulation queue with consecutive Tx channels */
4779 t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
4780 &v, 1, TP_TX_SCHED_HDR_A);
4781 t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
4782 &v, 1, TP_TX_SCHED_FIFO_A);
4783 t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
4784 &v, 1, TP_TX_SCHED_PCMD_A);
4786 #define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */
4787 if (is_offload(adap)) {
4788 t4_write_reg(adap, TP_TX_MOD_QUEUE_WEIGHT0_A,
4789 TX_MODQ_WEIGHT0_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
4790 TX_MODQ_WEIGHT1_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
4791 TX_MODQ_WEIGHT2_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
4792 TX_MODQ_WEIGHT3_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
4793 t4_write_reg(adap, TP_TX_MOD_CHANNEL_WEIGHT_A,
4794 TX_MODQ_WEIGHT0_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
4795 TX_MODQ_WEIGHT1_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
4796 TX_MODQ_WEIGHT2_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
4797 TX_MODQ_WEIGHT3_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
4800 /* get basic stuff going */
4801 return t4_early_init(adap, adap->fn);
4805 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
4807 #define MAX_ATIDS 8192U
4810 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
4812 * If the firmware we're dealing with has Configuration File support, then
4813 * we use that to perform all configuration
4817 * Tweak configuration based on module parameters, etc. Most of these have
4818 * defaults assigned to them by Firmware Configuration Files (if we're using
4819 * them) but need to be explicitly set if we're using hard-coded
4820 * initialization. But even in the case of using Firmware Configuration
4821 * Files, we'd like to expose the ability to change these via module
4822 * parameters so these are essentially common tweaks/settings for
4823 * Configuration Files and hard-coded initialization ...
4825 static int adap_init0_tweaks(struct adapter *adapter)
4828 * Fix up various Host-Dependent Parameters like Page Size, Cache
4829 * Line Size, etc. The firmware default is for a 4KB Page Size and
4830 * 64B Cache Line Size ...
4832 t4_fixup_host_params(adapter, PAGE_SIZE, L1_CACHE_BYTES);
4835 * Process module parameters which affect early initialization.
4837 if (rx_dma_offset != 2 && rx_dma_offset != 0) {
4838 dev_err(&adapter->pdev->dev,
4839 "Ignoring illegal rx_dma_offset=%d, using 2\n",
4843 t4_set_reg_field(adapter, SGE_CONTROL_A,
4844 PKTSHIFT_V(PKTSHIFT_M),
4845 PKTSHIFT_V(rx_dma_offset));
4848 * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
4849 * adds the pseudo header itself.
4851 t4_tp_wr_bits_indirect(adapter, TP_INGRESS_CONFIG_A,
4852 CSUM_HAS_PSEUDO_HDR_F, 0);
4858 * Attempt to initialize the adapter via a Firmware Configuration File.
4860 static int adap_init0_config(struct adapter *adapter, int reset)
4862 struct fw_caps_config_cmd caps_cmd;
4863 const struct firmware *cf;
4864 unsigned long mtype = 0, maddr = 0;
4865 u32 finiver, finicsum, cfcsum;
4867 int config_issued = 0;
4868 char *fw_config_file, fw_config_file_path[256];
4869 char *config_name = NULL;
4872 * Reset device if necessary.
4875 ret = t4_fw_reset(adapter, adapter->mbox,
4876 PIORSTMODE_F | PIORST_F);
4882 * If we have a T4 configuration file under /lib/firmware/cxgb4/,
4883 * then use that. Otherwise, use the configuration file stored
4884 * in the adapter flash ...
4886 switch (CHELSIO_CHIP_VERSION(adapter->params.chip)) {
4888 fw_config_file = FW4_CFNAME;
4891 fw_config_file = FW5_CFNAME;
4894 dev_err(adapter->pdev_dev, "Device %d is not supported\n",
4895 adapter->pdev->device);
4900 ret = request_firmware(&cf, fw_config_file, adapter->pdev_dev);
4902 config_name = "On FLASH";
4903 mtype = FW_MEMTYPE_CF_FLASH;
4904 maddr = t4_flash_cfg_addr(adapter);
4906 u32 params[7], val[7];
4908 sprintf(fw_config_file_path,
4909 "/lib/firmware/%s", fw_config_file);
4910 config_name = fw_config_file_path;
4912 if (cf->size >= FLASH_CFG_MAX_SIZE)
4915 params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
4916 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_CF));
4917 ret = t4_query_params(adapter, adapter->mbox,
4918 adapter->fn, 0, 1, params, val);
4921 * For t4_memory_rw() below addresses and
4922 * sizes have to be in terms of multiples of 4
4923 * bytes. So, if the Configuration File isn't
4924 * a multiple of 4 bytes in length we'll have
4925 * to write that out separately since we can't
4926 * guarantee that the bytes following the
4927 * residual byte in the buffer returned by
4928 * request_firmware() are zeroed out ...
4930 size_t resid = cf->size & 0x3;
4931 size_t size = cf->size & ~0x3;
4932 __be32 *data = (__be32 *)cf->data;
4934 mtype = FW_PARAMS_PARAM_Y_G(val[0]);
4935 maddr = FW_PARAMS_PARAM_Z_G(val[0]) << 16;
4937 spin_lock(&adapter->win0_lock);
4938 ret = t4_memory_rw(adapter, 0, mtype, maddr,
4939 size, data, T4_MEMORY_WRITE);
4940 if (ret == 0 && resid != 0) {
4947 last.word = data[size >> 2];
4948 for (i = resid; i < 4; i++)
4950 ret = t4_memory_rw(adapter, 0, mtype,
4955 spin_unlock(&adapter->win0_lock);
4959 release_firmware(cf);
4965 * Issue a Capability Configuration command to the firmware to get it
4966 * to parse the Configuration File. We don't use t4_fw_config_file()
4967 * because we want the ability to modify various features after we've
4968 * processed the configuration file ...
4970 memset(&caps_cmd, 0, sizeof(caps_cmd));
4971 caps_cmd.op_to_write =
4972 htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4975 caps_cmd.cfvalid_to_len16 =
4976 htonl(FW_CAPS_CONFIG_CMD_CFVALID_F |
4977 FW_CAPS_CONFIG_CMD_MEMTYPE_CF_V(mtype) |
4978 FW_CAPS_CONFIG_CMD_MEMADDR64K_CF_V(maddr >> 16) |
4979 FW_LEN16(caps_cmd));
4980 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
4983 /* If the CAPS_CONFIG failed with an ENOENT (for a Firmware
4984 * Configuration File in FLASH), our last gasp effort is to use the
4985 * Firmware Configuration File which is embedded in the firmware. A
4986 * very few early versions of the firmware didn't have one embedded
4987 * but we can ignore those.
4989 if (ret == -ENOENT) {
4990 memset(&caps_cmd, 0, sizeof(caps_cmd));
4991 caps_cmd.op_to_write =
4992 htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4995 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
4996 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd,
4997 sizeof(caps_cmd), &caps_cmd);
4998 config_name = "Firmware Default";
5005 finiver = ntohl(caps_cmd.finiver);
5006 finicsum = ntohl(caps_cmd.finicsum);
5007 cfcsum = ntohl(caps_cmd.cfcsum);
5008 if (finicsum != cfcsum)
5009 dev_warn(adapter->pdev_dev, "Configuration File checksum "\
5010 "mismatch: [fini] csum=%#x, computed csum=%#x\n",
5014 * And now tell the firmware to use the configuration we just loaded.
5016 caps_cmd.op_to_write =
5017 htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5020 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
5021 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
5027 * Tweak configuration based on system architecture, module
5030 ret = adap_init0_tweaks(adapter);
5035 * And finally tell the firmware to initialize itself using the
5036 * parameters from the Configuration File.
5038 ret = t4_fw_initialize(adapter, adapter->mbox);
5042 /* Emit Firmware Configuration File information and return
5045 dev_info(adapter->pdev_dev, "Successfully configured using Firmware "\
5046 "Configuration File \"%s\", version %#x, computed checksum %#x\n",
5047 config_name, finiver, cfcsum);
5051 * Something bad happened. Return the error ... (If the "error"
5052 * is that there's no Configuration File on the adapter we don't
5053 * want to issue a warning since this is fairly common.)
5056 if (config_issued && ret != -ENOENT)
5057 dev_warn(adapter->pdev_dev, "\"%s\" configuration file error %d\n",
5062 static struct fw_info fw_info_array[] = {
5065 .fs_name = FW4_CFNAME,
5066 .fw_mod_name = FW4_FNAME,
5068 .chip = FW_HDR_CHIP_T4,
5069 .fw_ver = __cpu_to_be32(FW_VERSION(T4)),
5070 .intfver_nic = FW_INTFVER(T4, NIC),
5071 .intfver_vnic = FW_INTFVER(T4, VNIC),
5072 .intfver_ri = FW_INTFVER(T4, RI),
5073 .intfver_iscsi = FW_INTFVER(T4, ISCSI),
5074 .intfver_fcoe = FW_INTFVER(T4, FCOE),
5078 .fs_name = FW5_CFNAME,
5079 .fw_mod_name = FW5_FNAME,
5081 .chip = FW_HDR_CHIP_T5,
5082 .fw_ver = __cpu_to_be32(FW_VERSION(T5)),
5083 .intfver_nic = FW_INTFVER(T5, NIC),
5084 .intfver_vnic = FW_INTFVER(T5, VNIC),
5085 .intfver_ri = FW_INTFVER(T5, RI),
5086 .intfver_iscsi = FW_INTFVER(T5, ISCSI),
5087 .intfver_fcoe = FW_INTFVER(T5, FCOE),
5092 static struct fw_info *find_fw_info(int chip)
5096 for (i = 0; i < ARRAY_SIZE(fw_info_array); i++) {
5097 if (fw_info_array[i].chip == chip)
5098 return &fw_info_array[i];
5104 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
5106 static int adap_init0(struct adapter *adap)
5110 enum dev_state state;
5111 u32 params[7], val[7];
5112 struct fw_caps_config_cmd caps_cmd;
5115 /* Grab Firmware Device Log parameters as early as possible so we have
5116 * access to it for debugging, etc.
5118 ret = t4_init_devlog_params(adap);
5122 /* Contact FW, advertising Master capability */
5123 ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state);
5125 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
5129 if (ret == adap->mbox)
5130 adap->flags |= MASTER_PF;
5133 * If we're the Master PF Driver and the device is uninitialized,
5134 * then let's consider upgrading the firmware ... (We always want
5135 * to check the firmware version number in order to A. get it for
5136 * later reporting and B. to warn if the currently loaded firmware
5137 * is excessively mismatched relative to the driver.)
5139 t4_get_fw_version(adap, &adap->params.fw_vers);
5140 t4_get_tp_version(adap, &adap->params.tp_vers);
5141 if ((adap->flags & MASTER_PF) && state != DEV_STATE_INIT) {
5142 struct fw_info *fw_info;
5143 struct fw_hdr *card_fw;
5144 const struct firmware *fw;
5145 const u8 *fw_data = NULL;
5146 unsigned int fw_size = 0;
5148 /* This is the firmware whose headers the driver was compiled
5151 fw_info = find_fw_info(CHELSIO_CHIP_VERSION(adap->params.chip));
5152 if (fw_info == NULL) {
5153 dev_err(adap->pdev_dev,
5154 "unable to get firmware info for chip %d.\n",
5155 CHELSIO_CHIP_VERSION(adap->params.chip));
5159 /* allocate memory to read the header of the firmware on the
5162 card_fw = t4_alloc_mem(sizeof(*card_fw));
5164 /* Get FW from from /lib/firmware/ */
5165 ret = request_firmware(&fw, fw_info->fw_mod_name,
5168 dev_err(adap->pdev_dev,
5169 "unable to load firmware image %s, error %d\n",
5170 fw_info->fw_mod_name, ret);
5176 /* upgrade FW logic */
5177 ret = t4_prep_fw(adap, fw_info, fw_data, fw_size, card_fw,
5181 release_firmware(fw);
5182 t4_free_mem(card_fw);
5189 * Grab VPD parameters. This should be done after we establish a
5190 * connection to the firmware since some of the VPD parameters
5191 * (notably the Core Clock frequency) are retrieved via requests to
5192 * the firmware. On the other hand, we need these fairly early on
5193 * so we do this right after getting ahold of the firmware.
5195 ret = get_vpd_params(adap, &adap->params.vpd);
5200 * Find out what ports are available to us. Note that we need to do
5201 * this before calling adap_init0_no_config() since it needs nports
5205 FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
5206 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PORTVEC);
5207 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1, &v, &port_vec);
5211 adap->params.nports = hweight32(port_vec);
5212 adap->params.portvec = port_vec;
5214 /* If the firmware is initialized already, emit a simply note to that
5215 * effect. Otherwise, it's time to try initializing the adapter.
5217 if (state == DEV_STATE_INIT) {
5218 dev_info(adap->pdev_dev, "Coming up as %s: "\
5219 "Adapter already initialized\n",
5220 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
5222 dev_info(adap->pdev_dev, "Coming up as MASTER: "\
5223 "Initializing adapter\n");
5225 /* Find out whether we're dealing with a version of the
5226 * firmware which has configuration file support.
5228 params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
5229 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_CF));
5230 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1,
5233 /* If the firmware doesn't support Configuration Files,
5237 dev_err(adap->pdev_dev, "firmware doesn't support "
5238 "Firmware Configuration Files\n");
5242 /* The firmware provides us with a memory buffer where we can
5243 * load a Configuration File from the host if we want to
5244 * override the Configuration File in flash.
5246 ret = adap_init0_config(adap, reset);
5247 if (ret == -ENOENT) {
5248 dev_err(adap->pdev_dev, "no Configuration File "
5249 "present on adapter.\n");
5253 dev_err(adap->pdev_dev, "could not initialize "
5254 "adapter, error %d\n", -ret);
5259 /* Give the SGE code a chance to pull in anything that it needs ...
5260 * Note that this must be called after we retrieve our VPD parameters
5261 * in order to know how to convert core ticks to seconds, etc.
5263 ret = t4_sge_init(adap);
5267 if (is_bypass_device(adap->pdev->device))
5268 adap->params.bypass = 1;
5271 * Grab some of our basic fundamental operating parameters.
5273 #define FW_PARAM_DEV(param) \
5274 (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \
5275 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param))
5277 #define FW_PARAM_PFVF(param) \
5278 FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
5279 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)| \
5280 FW_PARAMS_PARAM_Y_V(0) | \
5281 FW_PARAMS_PARAM_Z_V(0)
5283 params[0] = FW_PARAM_PFVF(EQ_START);
5284 params[1] = FW_PARAM_PFVF(L2T_START);
5285 params[2] = FW_PARAM_PFVF(L2T_END);
5286 params[3] = FW_PARAM_PFVF(FILTER_START);
5287 params[4] = FW_PARAM_PFVF(FILTER_END);
5288 params[5] = FW_PARAM_PFVF(IQFLINT_START);
5289 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params, val);
5292 adap->sge.egr_start = val[0];
5293 adap->l2t_start = val[1];
5294 adap->l2t_end = val[2];
5295 adap->tids.ftid_base = val[3];
5296 adap->tids.nftids = val[4] - val[3] + 1;
5297 adap->sge.ingr_start = val[5];
5299 /* qids (ingress/egress) returned from firmware can be anywhere
5300 * in the range from EQ(IQFLINT)_START to EQ(IQFLINT)_END.
5301 * Hence driver needs to allocate memory for this range to
5302 * store the queue info. Get the highest IQFLINT/EQ index returned
5303 * in FW_EQ_*_CMD.alloc command.
5305 params[0] = FW_PARAM_PFVF(EQ_END);
5306 params[1] = FW_PARAM_PFVF(IQFLINT_END);
5307 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
5310 adap->sge.egr_sz = val[0] - adap->sge.egr_start + 1;
5311 adap->sge.ingr_sz = val[1] - adap->sge.ingr_start + 1;
5313 adap->sge.egr_map = kcalloc(adap->sge.egr_sz,
5314 sizeof(*adap->sge.egr_map), GFP_KERNEL);
5315 if (!adap->sge.egr_map) {
5320 adap->sge.ingr_map = kcalloc(adap->sge.ingr_sz,
5321 sizeof(*adap->sge.ingr_map), GFP_KERNEL);
5322 if (!adap->sge.ingr_map) {
5327 /* Allocate the memory for the vaious egress queue bitmaps
5328 * ie starving_fl and txq_maperr.
5330 adap->sge.starving_fl = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
5331 sizeof(long), GFP_KERNEL);
5332 if (!adap->sge.starving_fl) {
5337 adap->sge.txq_maperr = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
5338 sizeof(long), GFP_KERNEL);
5339 if (!adap->sge.txq_maperr) {
5344 params[0] = FW_PARAM_PFVF(CLIP_START);
5345 params[1] = FW_PARAM_PFVF(CLIP_END);
5346 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
5349 adap->clipt_start = val[0];
5350 adap->clipt_end = val[1];
5352 /* query params related to active filter region */
5353 params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
5354 params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
5355 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
5356 /* If Active filter size is set we enable establishing
5357 * offload connection through firmware work request
5359 if ((val[0] != val[1]) && (ret >= 0)) {
5360 adap->flags |= FW_OFLD_CONN;
5361 adap->tids.aftid_base = val[0];
5362 adap->tids.aftid_end = val[1];
5365 /* If we're running on newer firmware, let it know that we're
5366 * prepared to deal with encapsulated CPL messages. Older
5367 * firmware won't understand this and we'll just get
5368 * unencapsulated messages ...
5370 params[0] = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
5372 (void) t4_set_params(adap, adap->mbox, adap->fn, 0, 1, params, val);
5375 * Find out whether we're allowed to use the T5+ ULPTX MEMWRITE DSGL
5376 * capability. Earlier versions of the firmware didn't have the
5377 * ULPTX_MEMWRITE_DSGL so we'll interpret a query failure as no
5378 * permission to use ULPTX MEMWRITE DSGL.
5380 if (is_t4(adap->params.chip)) {
5381 adap->params.ulptx_memwrite_dsgl = false;
5383 params[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
5384 ret = t4_query_params(adap, adap->mbox, adap->fn, 0,
5386 adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0);
5390 * Get device capabilities so we can determine what resources we need
5393 memset(&caps_cmd, 0, sizeof(caps_cmd));
5394 caps_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
5395 FW_CMD_REQUEST_F | FW_CMD_READ_F);
5396 caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
5397 ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
5402 if (caps_cmd.ofldcaps) {
5403 /* query offload-related parameters */
5404 params[0] = FW_PARAM_DEV(NTID);
5405 params[1] = FW_PARAM_PFVF(SERVER_START);
5406 params[2] = FW_PARAM_PFVF(SERVER_END);
5407 params[3] = FW_PARAM_PFVF(TDDP_START);
5408 params[4] = FW_PARAM_PFVF(TDDP_END);
5409 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
5410 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
5414 adap->tids.ntids = val[0];
5415 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
5416 adap->tids.stid_base = val[1];
5417 adap->tids.nstids = val[2] - val[1] + 1;
5419 * Setup server filter region. Divide the availble filter
5420 * region into two parts. Regular filters get 1/3rd and server
5421 * filters get 2/3rd part. This is only enabled if workarond
5423 * 1. For regular filters.
5424 * 2. Server filter: This are special filters which are used
5425 * to redirect SYN packets to offload queue.
5427 if (adap->flags & FW_OFLD_CONN && !is_bypass(adap)) {
5428 adap->tids.sftid_base = adap->tids.ftid_base +
5429 DIV_ROUND_UP(adap->tids.nftids, 3);
5430 adap->tids.nsftids = adap->tids.nftids -
5431 DIV_ROUND_UP(adap->tids.nftids, 3);
5432 adap->tids.nftids = adap->tids.sftid_base -
5433 adap->tids.ftid_base;
5435 adap->vres.ddp.start = val[3];
5436 adap->vres.ddp.size = val[4] - val[3] + 1;
5437 adap->params.ofldq_wr_cred = val[5];
5439 adap->params.offload = 1;
5441 if (caps_cmd.rdmacaps) {
5442 params[0] = FW_PARAM_PFVF(STAG_START);
5443 params[1] = FW_PARAM_PFVF(STAG_END);
5444 params[2] = FW_PARAM_PFVF(RQ_START);
5445 params[3] = FW_PARAM_PFVF(RQ_END);
5446 params[4] = FW_PARAM_PFVF(PBL_START);
5447 params[5] = FW_PARAM_PFVF(PBL_END);
5448 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
5452 adap->vres.stag.start = val[0];
5453 adap->vres.stag.size = val[1] - val[0] + 1;
5454 adap->vres.rq.start = val[2];
5455 adap->vres.rq.size = val[3] - val[2] + 1;
5456 adap->vres.pbl.start = val[4];
5457 adap->vres.pbl.size = val[5] - val[4] + 1;
5459 params[0] = FW_PARAM_PFVF(SQRQ_START);
5460 params[1] = FW_PARAM_PFVF(SQRQ_END);
5461 params[2] = FW_PARAM_PFVF(CQ_START);
5462 params[3] = FW_PARAM_PFVF(CQ_END);
5463 params[4] = FW_PARAM_PFVF(OCQ_START);
5464 params[5] = FW_PARAM_PFVF(OCQ_END);
5465 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params,
5469 adap->vres.qp.start = val[0];
5470 adap->vres.qp.size = val[1] - val[0] + 1;
5471 adap->vres.cq.start = val[2];
5472 adap->vres.cq.size = val[3] - val[2] + 1;
5473 adap->vres.ocq.start = val[4];
5474 adap->vres.ocq.size = val[5] - val[4] + 1;
5476 params[0] = FW_PARAM_DEV(MAXORDIRD_QP);
5477 params[1] = FW_PARAM_DEV(MAXIRD_ADAPTER);
5478 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params,
5481 adap->params.max_ordird_qp = 8;
5482 adap->params.max_ird_adapter = 32 * adap->tids.ntids;
5485 adap->params.max_ordird_qp = val[0];
5486 adap->params.max_ird_adapter = val[1];
5488 dev_info(adap->pdev_dev,
5489 "max_ordird_qp %d max_ird_adapter %d\n",
5490 adap->params.max_ordird_qp,
5491 adap->params.max_ird_adapter);
5493 if (caps_cmd.iscsicaps) {
5494 params[0] = FW_PARAM_PFVF(ISCSI_START);
5495 params[1] = FW_PARAM_PFVF(ISCSI_END);
5496 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2,
5500 adap->vres.iscsi.start = val[0];
5501 adap->vres.iscsi.size = val[1] - val[0] + 1;
5503 #undef FW_PARAM_PFVF
5506 /* The MTU/MSS Table is initialized by now, so load their values. If
5507 * we're initializing the adapter, then we'll make any modifications
5508 * we want to the MTU/MSS Table and also initialize the congestion
5511 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
5512 if (state != DEV_STATE_INIT) {
5515 /* The default MTU Table contains values 1492 and 1500.
5516 * However, for TCP, it's better to have two values which are
5517 * a multiple of 8 +/- 4 bytes apart near this popular MTU.
5518 * This allows us to have a TCP Data Payload which is a
5519 * multiple of 8 regardless of what combination of TCP Options
5520 * are in use (always a multiple of 4 bytes) which is
5521 * important for performance reasons. For instance, if no
5522 * options are in use, then we have a 20-byte IP header and a
5523 * 20-byte TCP header. In this case, a 1500-byte MSS would
5524 * result in a TCP Data Payload of 1500 - 40 == 1460 bytes
5525 * which is not a multiple of 8. So using an MSS of 1488 in
5526 * this case results in a TCP Data Payload of 1448 bytes which
5527 * is a multiple of 8. On the other hand, if 12-byte TCP Time
5528 * Stamps have been negotiated, then an MTU of 1500 bytes
5529 * results in a TCP Data Payload of 1448 bytes which, as
5530 * above, is a multiple of 8 bytes ...
5532 for (i = 0; i < NMTUS; i++)
5533 if (adap->params.mtus[i] == 1492) {
5534 adap->params.mtus[i] = 1488;
5538 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
5539 adap->params.b_wnd);
5541 t4_init_sge_params(adap);
5542 t4_init_tp_params(adap);
5543 adap->flags |= FW_OK;
5547 * Something bad happened. If a command timed out or failed with EIO
5548 * FW does not operate within its spec or something catastrophic
5549 * happened to HW/FW, stop issuing commands.
5552 kfree(adap->sge.egr_map);
5553 kfree(adap->sge.ingr_map);
5554 kfree(adap->sge.starving_fl);
5555 kfree(adap->sge.txq_maperr);
5556 if (ret != -ETIMEDOUT && ret != -EIO)
5557 t4_fw_bye(adap, adap->mbox);
5563 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
5564 pci_channel_state_t state)
5567 struct adapter *adap = pci_get_drvdata(pdev);
5573 adap->flags &= ~FW_OK;
5574 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
5575 spin_lock(&adap->stats_lock);
5576 for_each_port(adap, i) {
5577 struct net_device *dev = adap->port[i];
5579 netif_device_detach(dev);
5580 netif_carrier_off(dev);
5582 spin_unlock(&adap->stats_lock);
5583 disable_interrupts(adap);
5584 if (adap->flags & FULL_INIT_DONE)
5587 if ((adap->flags & DEV_ENABLED)) {
5588 pci_disable_device(pdev);
5589 adap->flags &= ~DEV_ENABLED;
5591 out: return state == pci_channel_io_perm_failure ?
5592 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
5595 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
5598 struct fw_caps_config_cmd c;
5599 struct adapter *adap = pci_get_drvdata(pdev);
5602 pci_restore_state(pdev);
5603 pci_save_state(pdev);
5604 return PCI_ERS_RESULT_RECOVERED;
5607 if (!(adap->flags & DEV_ENABLED)) {
5608 if (pci_enable_device(pdev)) {
5609 dev_err(&pdev->dev, "Cannot reenable PCI "
5610 "device after reset\n");
5611 return PCI_ERS_RESULT_DISCONNECT;
5613 adap->flags |= DEV_ENABLED;
5616 pci_set_master(pdev);
5617 pci_restore_state(pdev);
5618 pci_save_state(pdev);
5619 pci_cleanup_aer_uncorrect_error_status(pdev);
5621 if (t4_wait_dev_ready(adap->regs) < 0)
5622 return PCI_ERS_RESULT_DISCONNECT;
5623 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL) < 0)
5624 return PCI_ERS_RESULT_DISCONNECT;
5625 adap->flags |= FW_OK;
5626 if (adap_init1(adap, &c))
5627 return PCI_ERS_RESULT_DISCONNECT;
5629 for_each_port(adap, i) {
5630 struct port_info *p = adap2pinfo(adap, i);
5632 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
5635 return PCI_ERS_RESULT_DISCONNECT;
5637 p->xact_addr_filt = -1;
5640 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
5641 adap->params.b_wnd);
5644 return PCI_ERS_RESULT_DISCONNECT;
5645 return PCI_ERS_RESULT_RECOVERED;
5648 static void eeh_resume(struct pci_dev *pdev)
5651 struct adapter *adap = pci_get_drvdata(pdev);
5657 for_each_port(adap, i) {
5658 struct net_device *dev = adap->port[i];
5660 if (netif_running(dev)) {
5662 cxgb_set_rxmode(dev);
5664 netif_device_attach(dev);
5669 static const struct pci_error_handlers cxgb4_eeh = {
5670 .error_detected = eeh_err_detected,
5671 .slot_reset = eeh_slot_reset,
5672 .resume = eeh_resume,
5675 static inline bool is_x_10g_port(const struct link_config *lc)
5677 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
5678 (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
5681 static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
5682 unsigned int us, unsigned int cnt,
5683 unsigned int size, unsigned int iqe_size)
5686 set_rspq_intr_params(q, us, cnt);
5687 q->iqe_len = iqe_size;
5692 * Perform default configuration of DMA queues depending on the number and type
5693 * of ports we found and the number of available CPUs. Most settings can be
5694 * modified by the admin prior to actual use.
5696 static void cfg_queues(struct adapter *adap)
5698 struct sge *s = &adap->sge;
5699 int i, n10g = 0, qidx = 0;
5700 #ifndef CONFIG_CHELSIO_T4_DCB
5705 for_each_port(adap, i)
5706 n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
5707 #ifdef CONFIG_CHELSIO_T4_DCB
5708 /* For Data Center Bridging support we need to be able to support up
5709 * to 8 Traffic Priorities; each of which will be assigned to its
5710 * own TX Queue in order to prevent Head-Of-Line Blocking.
5712 if (adap->params.nports * 8 > MAX_ETH_QSETS) {
5713 dev_err(adap->pdev_dev, "MAX_ETH_QSETS=%d < %d!\n",
5714 MAX_ETH_QSETS, adap->params.nports * 8);
5718 for_each_port(adap, i) {
5719 struct port_info *pi = adap2pinfo(adap, i);
5721 pi->first_qset = qidx;
5725 #else /* !CONFIG_CHELSIO_T4_DCB */
5727 * We default to 1 queue per non-10G port and up to # of cores queues
5731 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
5732 if (q10g > netif_get_num_default_rss_queues())
5733 q10g = netif_get_num_default_rss_queues();
5735 for_each_port(adap, i) {
5736 struct port_info *pi = adap2pinfo(adap, i);
5738 pi->first_qset = qidx;
5739 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
5742 #endif /* !CONFIG_CHELSIO_T4_DCB */
5745 s->max_ethqsets = qidx; /* MSI-X may lower it later */
5747 if (is_offload(adap)) {
5749 * For offload we use 1 queue/channel if all ports are up to 1G,
5750 * otherwise we divide all available queues amongst the channels
5751 * capped by the number of available cores.
5754 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
5756 s->ofldqsets = roundup(i, adap->params.nports);
5758 s->ofldqsets = adap->params.nports;
5759 /* For RDMA one Rx queue per channel suffices */
5760 s->rdmaqs = adap->params.nports;
5761 s->rdmaciqs = adap->params.nports;
5764 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
5765 struct sge_eth_rxq *r = &s->ethrxq[i];
5767 init_rspq(adap, &r->rspq, 5, 10, 1024, 64);
5771 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
5772 s->ethtxq[i].q.size = 1024;
5774 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
5775 s->ctrlq[i].q.size = 512;
5777 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
5778 s->ofldtxq[i].q.size = 1024;
5780 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
5781 struct sge_ofld_rxq *r = &s->ofldrxq[i];
5783 init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
5784 r->rspq.uld = CXGB4_ULD_ISCSI;
5788 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
5789 struct sge_ofld_rxq *r = &s->rdmarxq[i];
5791 init_rspq(adap, &r->rspq, 5, 1, 511, 64);
5792 r->rspq.uld = CXGB4_ULD_RDMA;
5796 ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
5797 if (ciq_size > SGE_MAX_IQ_SIZE) {
5798 CH_WARN(adap, "CIQ size too small for available IQs\n");
5799 ciq_size = SGE_MAX_IQ_SIZE;
5802 for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
5803 struct sge_ofld_rxq *r = &s->rdmaciq[i];
5805 init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
5806 r->rspq.uld = CXGB4_ULD_RDMA;
5809 init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
5810 init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
5814 * Reduce the number of Ethernet queues across all ports to at most n.
5815 * n provides at least one queue per port.
5817 static void reduce_ethqs(struct adapter *adap, int n)
5820 struct port_info *pi;
5822 while (n < adap->sge.ethqsets)
5823 for_each_port(adap, i) {
5824 pi = adap2pinfo(adap, i);
5825 if (pi->nqsets > 1) {
5827 adap->sge.ethqsets--;
5828 if (adap->sge.ethqsets <= n)
5834 for_each_port(adap, i) {
5835 pi = adap2pinfo(adap, i);
5841 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
5842 #define EXTRA_VECS 2
5844 static int enable_msix(struct adapter *adap)
5848 struct sge *s = &adap->sge;
5849 unsigned int nchan = adap->params.nports;
5850 struct msix_entry entries[MAX_INGQ + 1];
5852 for (i = 0; i < ARRAY_SIZE(entries); ++i)
5853 entries[i].entry = i;
5855 want = s->max_ethqsets + EXTRA_VECS;
5856 if (is_offload(adap)) {
5857 want += s->rdmaqs + s->rdmaciqs + s->ofldqsets;
5858 /* need nchan for each possible ULD */
5859 ofld_need = 3 * nchan;
5861 #ifdef CONFIG_CHELSIO_T4_DCB
5862 /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
5865 need = 8 * adap->params.nports + EXTRA_VECS + ofld_need;
5867 need = adap->params.nports + EXTRA_VECS + ofld_need;
5869 want = pci_enable_msix_range(adap->pdev, entries, need, want);
5874 * Distribute available vectors to the various queue groups.
5875 * Every group gets its minimum requirement and NIC gets top
5876 * priority for leftovers.
5878 i = want - EXTRA_VECS - ofld_need;
5879 if (i < s->max_ethqsets) {
5880 s->max_ethqsets = i;
5881 if (i < s->ethqsets)
5882 reduce_ethqs(adap, i);
5884 if (is_offload(adap)) {
5885 i = want - EXTRA_VECS - s->max_ethqsets;
5886 i -= ofld_need - nchan;
5887 s->ofldqsets = (i / nchan) * nchan; /* round down */
5889 for (i = 0; i < want; ++i)
5890 adap->msix_info[i].vec = entries[i].vector;
5897 static int init_rss(struct adapter *adap)
5901 for_each_port(adap, i) {
5902 struct port_info *pi = adap2pinfo(adap, i);
5904 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
5907 for (j = 0; j < pi->rss_size; j++)
5908 pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets);
5913 static void print_port_info(const struct net_device *dev)
5917 const char *spd = "";
5918 const struct port_info *pi = netdev_priv(dev);
5919 const struct adapter *adap = pi->adapter;
5921 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
5923 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
5925 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_8_0GB)
5928 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
5929 bufp += sprintf(bufp, "100/");
5930 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
5931 bufp += sprintf(bufp, "1000/");
5932 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
5933 bufp += sprintf(bufp, "10G/");
5934 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
5935 bufp += sprintf(bufp, "40G/");
5938 sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
5940 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
5941 adap->params.vpd.id,
5942 CHELSIO_CHIP_RELEASE(adap->params.chip), buf,
5943 is_offload(adap) ? "R" : "", adap->params.pci.width, spd,
5944 (adap->flags & USING_MSIX) ? " MSI-X" :
5945 (adap->flags & USING_MSI) ? " MSI" : "");
5946 netdev_info(dev, "S/N: %s, P/N: %s\n",
5947 adap->params.vpd.sn, adap->params.vpd.pn);
5950 static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
5952 pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
5956 * Free the following resources:
5957 * - memory used for tables
5960 * - resources FW is holding for us
5962 static void free_some_resources(struct adapter *adapter)
5966 t4_free_mem(adapter->l2t);
5967 t4_free_mem(adapter->tids.tid_tab);
5968 kfree(adapter->sge.egr_map);
5969 kfree(adapter->sge.ingr_map);
5970 kfree(adapter->sge.starving_fl);
5971 kfree(adapter->sge.txq_maperr);
5972 disable_msi(adapter);
5974 for_each_port(adapter, i)
5975 if (adapter->port[i]) {
5976 kfree(adap2pinfo(adapter, i)->rss);
5977 free_netdev(adapter->port[i]);
5979 if (adapter->flags & FW_OK)
5980 t4_fw_bye(adapter, adapter->fn);
5983 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
5984 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
5985 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
5986 #define SEGMENT_SIZE 128
5988 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
5990 int func, i, err, s_qpp, qpp, num_seg;
5991 struct port_info *pi;
5992 bool highdma = false;
5993 struct adapter *adapter = NULL;
5996 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
5998 err = pci_request_regions(pdev, KBUILD_MODNAME);
6000 /* Just info, some other driver may have claimed the device. */
6001 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
6005 err = pci_enable_device(pdev);
6007 dev_err(&pdev->dev, "cannot enable PCI device\n");
6008 goto out_release_regions;
6011 regs = pci_ioremap_bar(pdev, 0);
6013 dev_err(&pdev->dev, "cannot map device registers\n");
6015 goto out_disable_device;
6018 err = t4_wait_dev_ready(regs);
6020 goto out_unmap_bar0;
6022 /* We control everything through one PF */
6023 func = SOURCEPF_G(readl(regs + PL_WHOAMI_A));
6024 if (func != ent->driver_data) {
6026 pci_disable_device(pdev);
6027 pci_save_state(pdev); /* to restore SR-IOV later */
6031 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
6033 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
6035 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
6036 "coherent allocations\n");
6037 goto out_unmap_bar0;
6040 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
6042 dev_err(&pdev->dev, "no usable DMA configuration\n");
6043 goto out_unmap_bar0;
6047 pci_enable_pcie_error_reporting(pdev);
6048 enable_pcie_relaxed_ordering(pdev);
6049 pci_set_master(pdev);
6050 pci_save_state(pdev);
6052 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
6055 goto out_unmap_bar0;
6058 adapter->workq = create_singlethread_workqueue("cxgb4");
6059 if (!adapter->workq) {
6061 goto out_free_adapter;
6064 /* PCI device has been enabled */
6065 adapter->flags |= DEV_ENABLED;
6067 adapter->regs = regs;
6068 adapter->pdev = pdev;
6069 adapter->pdev_dev = &pdev->dev;
6070 adapter->mbox = func;
6072 adapter->msg_enable = dflt_msg_enable;
6073 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
6075 spin_lock_init(&adapter->stats_lock);
6076 spin_lock_init(&adapter->tid_release_lock);
6077 spin_lock_init(&adapter->win0_lock);
6079 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
6080 INIT_WORK(&adapter->db_full_task, process_db_full);
6081 INIT_WORK(&adapter->db_drop_task, process_db_drop);
6083 err = t4_prep_adapter(adapter);
6085 goto out_free_adapter;
6088 if (!is_t4(adapter->params.chip)) {
6089 s_qpp = (QUEUESPERPAGEPF0_S +
6090 (QUEUESPERPAGEPF1_S - QUEUESPERPAGEPF0_S) *
6092 qpp = 1 << QUEUESPERPAGEPF0_G(t4_read_reg(adapter,
6093 SGE_EGRESS_QUEUES_PER_PAGE_PF_A) >> s_qpp);
6094 num_seg = PAGE_SIZE / SEGMENT_SIZE;
6096 /* Each segment size is 128B. Write coalescing is enabled only
6097 * when SGE_EGRESS_QUEUES_PER_PAGE_PF reg value for the
6098 * queue is less no of segments that can be accommodated in
6101 if (qpp > num_seg) {
6103 "Incorrect number of egress queues per page\n");
6105 goto out_free_adapter;
6107 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
6108 pci_resource_len(pdev, 2));
6109 if (!adapter->bar2) {
6110 dev_err(&pdev->dev, "cannot map device bar2 region\n");
6112 goto out_free_adapter;
6116 setup_memwin(adapter);
6117 err = adap_init0(adapter);
6118 setup_memwin_rdma(adapter);
6122 for_each_port(adapter, i) {
6123 struct net_device *netdev;
6125 netdev = alloc_etherdev_mq(sizeof(struct port_info),
6132 SET_NETDEV_DEV(netdev, &pdev->dev);
6134 adapter->port[i] = netdev;
6135 pi = netdev_priv(netdev);
6136 pi->adapter = adapter;
6137 pi->xact_addr_filt = -1;
6139 netdev->irq = pdev->irq;
6141 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
6142 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
6143 NETIF_F_RXCSUM | NETIF_F_RXHASH |
6144 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
6146 netdev->hw_features |= NETIF_F_HIGHDMA;
6147 netdev->features |= netdev->hw_features;
6148 netdev->vlan_features = netdev->features & VLAN_FEAT;
6150 netdev->priv_flags |= IFF_UNICAST_FLT;
6152 netdev->netdev_ops = &cxgb4_netdev_ops;
6153 #ifdef CONFIG_CHELSIO_T4_DCB
6154 netdev->dcbnl_ops = &cxgb4_dcb_ops;
6155 cxgb4_dcb_state_init(netdev);
6157 netdev->ethtool_ops = &cxgb_ethtool_ops;
6160 pci_set_drvdata(pdev, adapter);
6162 if (adapter->flags & FW_OK) {
6163 err = t4_port_init(adapter, func, func, 0);
6169 * Configure queues and allocate tables now, they can be needed as
6170 * soon as the first register_netdev completes.
6172 cfg_queues(adapter);
6174 adapter->l2t = t4_init_l2t();
6175 if (!adapter->l2t) {
6176 /* We tolerate a lack of L2T, giving up some functionality */
6177 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
6178 adapter->params.offload = 0;
6181 #if IS_ENABLED(CONFIG_IPV6)
6182 adapter->clipt = t4_init_clip_tbl(adapter->clipt_start,
6183 adapter->clipt_end);
6184 if (!adapter->clipt) {
6185 /* We tolerate a lack of clip_table, giving up
6186 * some functionality
6188 dev_warn(&pdev->dev,
6189 "could not allocate Clip table, continuing\n");
6190 adapter->params.offload = 0;
6193 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
6194 dev_warn(&pdev->dev, "could not allocate TID table, "
6196 adapter->params.offload = 0;
6199 /* See what interrupts we'll be using */
6200 if (msi > 1 && enable_msix(adapter) == 0)
6201 adapter->flags |= USING_MSIX;
6202 else if (msi > 0 && pci_enable_msi(pdev) == 0)
6203 adapter->flags |= USING_MSI;
6205 err = init_rss(adapter);
6210 * The card is now ready to go. If any errors occur during device
6211 * registration we do not fail the whole card but rather proceed only
6212 * with the ports we manage to register successfully. However we must
6213 * register at least one net device.
6215 for_each_port(adapter, i) {
6216 pi = adap2pinfo(adapter, i);
6217 netif_set_real_num_tx_queues(adapter->port[i], pi->nqsets);
6218 netif_set_real_num_rx_queues(adapter->port[i], pi->nqsets);
6220 err = register_netdev(adapter->port[i]);
6223 adapter->chan_map[pi->tx_chan] = i;
6224 print_port_info(adapter->port[i]);
6227 dev_err(&pdev->dev, "could not register any net devices\n");
6231 dev_warn(&pdev->dev, "only %d net devices registered\n", i);
6235 if (cxgb4_debugfs_root) {
6236 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
6237 cxgb4_debugfs_root);
6238 setup_debugfs(adapter);
6241 /* PCIe EEH recovery on powerpc platforms needs fundamental reset */
6242 pdev->needs_freset = 1;
6244 if (is_offload(adapter))
6245 attach_ulds(adapter);
6248 #ifdef CONFIG_PCI_IOV
6249 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
6250 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
6251 dev_info(&pdev->dev,
6252 "instantiated %u virtual functions\n",
6258 free_some_resources(adapter);
6260 if (!is_t4(adapter->params.chip))
6261 iounmap(adapter->bar2);
6264 destroy_workqueue(adapter->workq);
6270 pci_disable_pcie_error_reporting(pdev);
6271 pci_disable_device(pdev);
6272 out_release_regions:
6273 pci_release_regions(pdev);
6277 static void remove_one(struct pci_dev *pdev)
6279 struct adapter *adapter = pci_get_drvdata(pdev);
6281 #ifdef CONFIG_PCI_IOV
6282 pci_disable_sriov(pdev);
6289 /* Tear down per-adapter Work Queue first since it can contain
6290 * references to our adapter data structure.
6292 destroy_workqueue(adapter->workq);
6294 if (is_offload(adapter))
6295 detach_ulds(adapter);
6297 disable_interrupts(adapter);
6299 for_each_port(adapter, i)
6300 if (adapter->port[i]->reg_state == NETREG_REGISTERED)
6301 unregister_netdev(adapter->port[i]);
6303 debugfs_remove_recursive(adapter->debugfs_root);
6305 /* If we allocated filters, free up state associated with any
6308 if (adapter->tids.ftid_tab) {
6309 struct filter_entry *f = &adapter->tids.ftid_tab[0];
6310 for (i = 0; i < (adapter->tids.nftids +
6311 adapter->tids.nsftids); i++, f++)
6313 clear_filter(adapter, f);
6316 if (adapter->flags & FULL_INIT_DONE)
6319 free_some_resources(adapter);
6320 #if IS_ENABLED(CONFIG_IPV6)
6321 t4_cleanup_clip_tbl(adapter);
6323 iounmap(adapter->regs);
6324 if (!is_t4(adapter->params.chip))
6325 iounmap(adapter->bar2);
6326 pci_disable_pcie_error_reporting(pdev);
6327 if ((adapter->flags & DEV_ENABLED)) {
6328 pci_disable_device(pdev);
6329 adapter->flags &= ~DEV_ENABLED;
6331 pci_release_regions(pdev);
6335 pci_release_regions(pdev);
6338 static struct pci_driver cxgb4_driver = {
6339 .name = KBUILD_MODNAME,
6340 .id_table = cxgb4_pci_tbl,
6342 .remove = remove_one,
6343 .shutdown = remove_one,
6344 .err_handler = &cxgb4_eeh,
6347 static int __init cxgb4_init_module(void)
6351 /* Debugfs support is optional, just warn if this fails */
6352 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
6353 if (!cxgb4_debugfs_root)
6354 pr_warn("could not create debugfs entry, continuing\n");
6356 ret = pci_register_driver(&cxgb4_driver);
6358 debugfs_remove(cxgb4_debugfs_root);
6360 #if IS_ENABLED(CONFIG_IPV6)
6361 if (!inet6addr_registered) {
6362 register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
6363 inet6addr_registered = true;
6370 static void __exit cxgb4_cleanup_module(void)
6372 #if IS_ENABLED(CONFIG_IPV6)
6373 if (inet6addr_registered) {
6374 unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
6375 inet6addr_registered = false;
6378 pci_unregister_driver(&cxgb4_driver);
6379 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
6382 module_init(cxgb4_init_module);
6383 module_exit(cxgb4_cleanup_module);