Merge branches 'core', 'cxgb4', 'ip-roce', 'iser', 'misc', 'mlx4', 'nes', 'ocrdma...
authorRoland Dreier <roland@purestorage.com>
Thu, 3 Apr 2014 15:30:17 +0000 (08:30 -0700)
committerRoland Dreier <roland@purestorage.com>
Thu, 3 Apr 2014 15:30:17 +0000 (08:30 -0700)
65 files changed:
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/mad.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/ehca/ehca_cq.c
drivers/infiniband/hw/ehca/ehca_mrmw.c
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_dma.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_user.h
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/ocrdma/Makefile
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_abi.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.h
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_stats.c [new file with mode: 0644]
drivers/infiniband/hw/ocrdma/ocrdma_stats.h [new file with mode: 0644]
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_diag.c
drivers/infiniband/hw/qib/qib_dma.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/hw/qib/qib_rc.c
drivers/infiniband/hw/qib/qib_ruc.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/hw/qib/qib_user_sdma.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/usnic/usnic_uiom.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/net/ethernet/emulex/benet/be_roce.c
drivers/net/ethernet/emulex/benet/be_roce.h
drivers/scsi/libiscsi.c
drivers/scsi/scsi_transport_srp.c
include/rdma/ib_cm.h
include/rdma/ib_verbs.h
include/scsi/libiscsi.h
include/scsi/scsi_transport_iscsi.h
include/scsi/scsi_transport_srp.h

index 0601b9daf8407ae31c4ccc6c99ee64372a5a896f..c3239170d8b789e98233b3cb0e4ead65673de125 100644 (file)
@@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
                           grh, &av->ah_attr);
 }
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
-{
-       struct cm_id_private *cm_id_priv;
-
-       cm_id_priv = container_of(id, struct cm_id_private, id);
-
-       if (smac != NULL)
-               memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
-
-       if (alt_smac != NULL)
-               memcpy(cm_id_priv->alt_av.smac, alt_smac,
-                      sizeof(cm_id_priv->alt_av.smac));
-
-       return 0;
-}
-EXPORT_SYMBOL(ib_update_cm_av);
-
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 {
        struct cm_device *cm_dev;
index 199958d9ddc8088d7576904ab96c838fb193990c..42c3058e6e9cdbaa406cf682c4c6bb2b064591bd 100644 (file)
@@ -1284,15 +1284,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        struct rdma_id_private *listen_id, *conn_id;
        struct rdma_cm_event event;
        int offset, ret;
-       u8 smac[ETH_ALEN];
-       u8 alt_smac[ETH_ALEN];
-       u8 *psmac = smac;
-       u8 *palt_smac = alt_smac;
-       int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
-                       RDMA_TRANSPORT_IB) &&
-                      (rdma_port_get_link_layer(cm_id->device,
-                       ib_event->param.req_rcvd.port) ==
-                       IB_LINK_LAYER_ETHERNET));
 
        listen_id = cm_id->context;
        if (!cma_check_req_qp_type(&listen_id->id, ib_event))
@@ -1336,28 +1327,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        ret = conn_id->id.event_handler(&conn_id->id, &event);
        if (ret)
                goto err3;
-
-       if (is_iboe) {
-               if (ib_event->param.req_rcvd.primary_path != NULL)
-                       rdma_addr_find_smac_by_sgid(
-                               &ib_event->param.req_rcvd.primary_path->sgid,
-                               psmac, NULL);
-               else
-                       psmac = NULL;
-               if (ib_event->param.req_rcvd.alternate_path != NULL)
-                       rdma_addr_find_smac_by_sgid(
-                               &ib_event->param.req_rcvd.alternate_path->sgid,
-                               palt_smac, NULL);
-               else
-                       palt_smac = NULL;
-       }
        /*
         * Acquire mutex to prevent user executing rdma_destroy_id()
         * while we're accessing the cm_id.
         */
        mutex_lock(&lock);
-       if (is_iboe)
-               ib_update_cm_av(cm_id, psmac, palt_smac);
        if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
            (conn_id->id.qp_type != IB_QPT_UD))
                ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
index 4c837e66516b894f88f689d93c9a84c501fa6b47..ab31f136d04b00a322a5e31f5c715deae9f09b4b 100644 (file)
@@ -1022,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
                                        mad_send_wr->send_buf.mad,
                                        sge[0].length,
                                        DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
+               return -ENOMEM;
+
        mad_send_wr->header_mapping = sge[0].addr;
 
        sge[1].addr = ib_dma_map_single(mad_agent->device,
                                        ib_get_payload(mad_send_wr),
                                        sge[1].length,
                                        DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
+               ib_dma_unmap_single(mad_agent->device,
+                                   mad_send_wr->header_mapping,
+                                   sge[0].length, DMA_TO_DEVICE);
+               return -ENOMEM;
+       }
        mad_send_wr->payload_mapping = sge[1].addr;
 
        spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -2590,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                                                 sizeof *mad_priv -
                                                   sizeof mad_priv->header,
                                                 DMA_FROM_DEVICE);
+               if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
+                                                 sg_list.addr))) {
+                       ret = -ENOMEM;
+                       break;
+               }
                mad_priv->header.mapping = sg_list.addr;
                recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
                mad_priv->header.mad_list.mad_queue = recv_queue;
index d286bdebe2ab90fc613c7696d41690909247da52..26046c23334c4372a5668e363298db58d400950d 100644 (file)
@@ -98,9 +98,9 @@ int c4iw_debug;
 module_param(c4iw_debug, int, 0644);
 MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
 
-static int peer2peer;
+static int peer2peer = 1;
 module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
 
 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 module_param(p2p_type, int, 0644);
@@ -400,7 +400,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
        n = dst_neigh_lookup(&rt->dst, &peer_ip);
        if (!n)
                return NULL;
-       if (!our_interface(dev, n->dev)) {
+       if (!our_interface(dev, n->dev) &&
+           !(n->dev->flags & IFF_LOOPBACK)) {
                dst_release(&rt->dst);
                return NULL;
        }
@@ -759,8 +760,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
        ep->mpa_skb = skb;
        c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
        start_ep_timer(ep);
-       state_set(&ep->com, MPA_REQ_SENT);
+       __state_set(&ep->com, MPA_REQ_SENT);
        ep->mpa_attr.initiator = 1;
+       ep->snd_seq += mpalen;
        return;
 }
 
@@ -840,6 +842,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
        BUG_ON(ep->mpa_skb);
        ep->mpa_skb = skb;
+       ep->snd_seq += mpalen;
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
@@ -923,7 +926,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
        skb_get(skb);
        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
        ep->mpa_skb = skb;
-       state_set(&ep->com, MPA_REP_SENT);
+       __state_set(&ep->com, MPA_REP_SENT);
+       ep->snd_seq += mpalen;
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
@@ -940,6 +944,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
        PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
             be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
 
+       mutex_lock(&ep->com.mutex);
        dst_confirm(ep->dst);
 
        /* setup the hwtid for this connection */
@@ -963,17 +968,18 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
                send_mpa_req(ep, skb, 1);
        else
                send_mpa_req(ep, skb, mpa_rev);
-
+       mutex_unlock(&ep->com.mutex);
        return 0;
 }
 
-static void close_complete_upcall(struct c4iw_ep *ep)
+static void close_complete_upcall(struct c4iw_ep *ep, int status)
 {
        struct iw_cm_event event;
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_CLOSE;
+       event.status = status;
        if (ep->com.cm_id) {
                PDBG("close complete delivered ep %p cm_id %p tid %u\n",
                     ep, ep->com.cm_id, ep->hwtid);
@@ -987,7 +993,6 @@ static void close_complete_upcall(struct c4iw_ep *ep)
 static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
 {
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-       close_complete_upcall(ep);
        state_set(&ep->com, ABORTING);
        set_bit(ABORT_CONN, &ep->com.history);
        return send_abort(ep, skb, gfp);
@@ -1066,9 +1071,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
        }
 }
 
-static void connect_request_upcall(struct c4iw_ep *ep)
+static int connect_request_upcall(struct c4iw_ep *ep)
 {
        struct iw_cm_event event;
+       int ret;
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
@@ -1093,15 +1099,14 @@ static void connect_request_upcall(struct c4iw_ep *ep)
                event.private_data_len = ep->plen;
                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
        }
-       if (state_read(&ep->parent_ep->com) != DEAD) {
-               c4iw_get_ep(&ep->com);
-               ep->parent_ep->com.cm_id->event_handler(
-                                               ep->parent_ep->com.cm_id,
-                                               &event);
-       }
+       c4iw_get_ep(&ep->com);
+       ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
+                                                     &event);
+       if (ret)
+               c4iw_put_ep(&ep->com);
        set_bit(CONNREQ_UPCALL, &ep->com.history);
        c4iw_put_ep(&ep->parent_ep->com);
-       ep->parent_ep = NULL;
+       return ret;
 }
 
 static void established_upcall(struct c4iw_ep *ep)
@@ -1165,7 +1170,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
         * the connection.
         */
        stop_ep_timer(ep);
-       if (state_read(&ep->com) != MPA_REQ_SENT)
+       if (ep->com.state != MPA_REQ_SENT)
                return;
 
        /*
@@ -1240,7 +1245,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
         * start reply message including private data. And
         * the MPA header is valid.
         */
-       state_set(&ep->com, FPDU_MODE);
+       __state_set(&ep->com, FPDU_MODE);
        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
        ep->mpa_attr.recv_marker_enabled = markers_enabled;
        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1355,7 +1360,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
        }
        goto out;
 err:
-       state_set(&ep->com, ABORTING);
+       __state_set(&ep->com, ABORTING);
        send_abort(ep, skb, GFP_KERNEL);
 out:
        connect_reply_upcall(ep, err);
@@ -1370,7 +1375,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 
-       if (state_read(&ep->com) != MPA_REQ_WAIT)
+       if (ep->com.state != MPA_REQ_WAIT)
                return;
 
        /*
@@ -1400,7 +1405,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
                return;
 
        PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-       stop_ep_timer(ep);
        mpa = (struct mpa_message *) ep->mpa_pkt;
 
        /*
@@ -1492,10 +1496,18 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
             ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
             ep->mpa_attr.p2p_type);
 
-       state_set(&ep->com, MPA_REQ_RCVD);
+       __state_set(&ep->com, MPA_REQ_RCVD);
+       stop_ep_timer(ep);
 
        /* drive upcall */
-       connect_request_upcall(ep);
+       mutex_lock(&ep->parent_ep->com.mutex);
+       if (ep->parent_ep->com.state != DEAD) {
+               if (connect_request_upcall(ep))
+                       abort_connection(ep, skb, GFP_KERNEL);
+       } else {
+               abort_connection(ep, skb, GFP_KERNEL);
+       }
+       mutex_unlock(&ep->parent_ep->com.mutex);
        return;
 }
 
@@ -1509,14 +1521,17 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
        __u8 status = hdr->status;
 
        ep = lookup_tid(t, tid);
+       if (!ep)
+               return 0;
        PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
        skb_pull(skb, sizeof(*hdr));
        skb_trim(skb, dlen);
+       mutex_lock(&ep->com.mutex);
 
        /* update RX credits */
        update_rx_credits(ep, dlen);
 
-       switch (state_read(&ep->com)) {
+       switch (ep->com.state) {
        case MPA_REQ_SENT:
                ep->rcv_seq += dlen;
                process_mpa_reply(ep, skb);
@@ -1532,7 +1547,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
                        pr_err("%s Unexpected streaming data." \
                               " qpid %u ep %p state %d tid %u status %d\n",
                               __func__, ep->com.qp->wq.sq.qid, ep,
-                              state_read(&ep->com), ep->hwtid, status);
+                              ep->com.state, ep->hwtid, status);
                attrs.next_state = C4IW_QP_STATE_TERMINATE;
                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
@@ -1541,6 +1556,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
        default:
                break;
        }
+       mutex_unlock(&ep->com.mutex);
        return 0;
 }
 
@@ -2246,7 +2262,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
                }
-               close_complete_upcall(ep);
+               close_complete_upcall(ep, 0);
                __state_set(&ep->com, DEAD);
                release = 1;
                disconnect = 0;
@@ -2425,7 +2441,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
                                             C4IW_QP_ATTR_NEXT_STATE,
                                             &attrs, 1);
                }
-               close_complete_upcall(ep);
+               close_complete_upcall(ep, 0);
                __state_set(&ep->com, DEAD);
                release = 1;
                break;
@@ -2500,22 +2516,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
 
 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
 {
-       int err;
+       int err = 0;
+       int disconnect = 0;
        struct c4iw_ep *ep = to_ep(cm_id);
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 
-       if (state_read(&ep->com) == DEAD) {
+       mutex_lock(&ep->com.mutex);
+       if (ep->com.state == DEAD) {
+               mutex_unlock(&ep->com.mutex);
                c4iw_put_ep(&ep->com);
                return -ECONNRESET;
        }
        set_bit(ULP_REJECT, &ep->com.history);
-       BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+       BUG_ON(ep->com.state != MPA_REQ_RCVD);
        if (mpa_rev == 0)
                abort_connection(ep, NULL, GFP_KERNEL);
        else {
                err = send_mpa_reject(ep, pdata, pdata_len);
-               err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+               disconnect = 1;
        }
+       mutex_unlock(&ep->com.mutex);
+       if (disconnect)
+               err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
        c4iw_put_ep(&ep->com);
        return 0;
 }
@@ -2530,12 +2552,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-       if (state_read(&ep->com) == DEAD) {
+
+       mutex_lock(&ep->com.mutex);
+       if (ep->com.state == DEAD) {
                err = -ECONNRESET;
                goto err;
        }
 
-       BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+       BUG_ON(ep->com.state != MPA_REQ_RCVD);
        BUG_ON(!qp);
 
        set_bit(ULP_ACCEPT, &ep->com.history);
@@ -2604,14 +2628,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (err)
                goto err1;
 
-       state_set(&ep->com, FPDU_MODE);
+       __state_set(&ep->com, FPDU_MODE);
        established_upcall(ep);
+       mutex_unlock(&ep->com.mutex);
        c4iw_put_ep(&ep->com);
        return 0;
 err1:
        ep->com.cm_id = NULL;
        cm_id->rem_ref(cm_id);
 err:
+       mutex_unlock(&ep->com.mutex);
        c4iw_put_ep(&ep->com);
        return err;
 }
@@ -2980,7 +3006,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
        rdev = &ep->com.dev->rdev;
        if (c4iw_fatal_error(rdev)) {
                fatal = 1;
-               close_complete_upcall(ep);
+               close_complete_upcall(ep, -EIO);
                ep->com.state = DEAD;
        }
        switch (ep->com.state) {
@@ -3022,7 +3048,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
        if (close) {
                if (abrupt) {
                        set_bit(EP_DISC_ABORT, &ep->com.history);
-                       close_complete_upcall(ep);
+                       close_complete_upcall(ep, -ECONNRESET);
                        ret = send_abort(ep, NULL, gfp);
                } else {
                        set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3203,6 +3229,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
        struct sk_buff *req_skb;
        struct fw_ofld_connection_wr *req;
        struct cpl_pass_accept_req *cpl = cplhdr(skb);
+       int ret;
 
        req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
        req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
@@ -3239,7 +3266,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
        req->cookie = (unsigned long)skb;
 
        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
-       cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+       ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+       if (ret < 0) {
+               pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
+                      ret);
+               kfree_skb(skb);
+               kfree_skb(req_skb);
+       }
 }
 
 /*
@@ -3346,13 +3379,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
                pi = (struct port_info *)netdev_priv(pdev);
                tx_chan = cxgb4_port_chan(pdev);
        }
+       neigh_release(neigh);
        if (!e) {
                pr_err("%s - failed to allocate l2t entry!\n",
                       __func__);
                goto free_dst;
        }
 
-       neigh_release(neigh);
        step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
        rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
        window = (__force u16) htons((__force u16)tcph->window);
@@ -3427,6 +3460,7 @@ static void process_timeout(struct c4iw_ep *ep)
                                     &attrs, 1);
                }
                __state_set(&ep->com, ABORTING);
+               close_complete_upcall(ep, -ETIMEDOUT);
                break;
        default:
                WARN(1, "%s unexpected state ep %p tid %u state %u\n",
index 88de3aa9c5b0205952299a7823ad0bfc071d3a5d..ce468e54242881096ce1c2ce8691b44e43b0bab9 100644 (file)
@@ -365,8 +365,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
 
                if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
 
-                       /*
-                        * drop peer2peer RTR reads.
+                       /* If we have reached here because of async
+                        * event or other error, and have egress error
+                        * then drop
+                        */
+                       if (CQE_TYPE(hw_cqe) == 1)
+                               goto next_cqe;
+
+                       /* drop peer2peer RTR reads.
                         */
                        if (CQE_WRID_STAG(hw_cqe) == 1)
                                goto next_cqe;
@@ -511,8 +517,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
         */
        if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
 
-               /*
-                * If this is an unsolicited read response, then the read
+               /* If we have reached here because of async
+                * event or other error, and have egress error
+                * then drop
+                */
+               if (CQE_TYPE(hw_cqe) == 1) {
+                       if (CQE_STATUS(hw_cqe))
+                               t4_set_wq_in_error(wq);
+                       ret = -EAGAIN;
+                       goto skip_cqe;
+               }
+
+               /* If this is an unsolicited read response, then the read
                 * was generated by the kernel driver as part of peer-2-peer
                 * connection setup.  So ignore the completion.
                 */
@@ -603,7 +619,7 @@ proc_cqe:
         */
        if (SQ_TYPE(hw_cqe)) {
                int idx = CQE_WRID_SQ_IDX(hw_cqe);
-               BUG_ON(idx > wq->sq.size);
+               BUG_ON(idx >= wq->sq.size);
 
                /*
                * Account for any unsignaled completions completed by
@@ -617,7 +633,7 @@ proc_cqe:
                        wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
                else
                        wq->sq.in_use -= idx - wq->sq.cidx;
-               BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
+               BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
 
                wq->sq.cidx = (uint16_t)idx;
                PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
@@ -881,7 +897,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
        /*
         * Make actual HW queue 2x to avoid cdix_inc overflows.
         */
-       hwentries = entries * 2;
+       hwentries = min(entries * 2, T4_MAX_IQ_SIZE);
 
        /*
         * Make HW queue at least 64 entries so GTS updates aren't too
@@ -930,6 +946,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
                if (!mm2)
                        goto err4;
 
+               memset(&uresp, 0, sizeof(uresp));
                uresp.qid_mask = rhp->rdev.cqmask;
                uresp.cqid = chp->cq.cqid;
                uresp.size = chp->cq.size;
index 4a033853312e52c6ff026d8e7e7892f010826255..982f81586f90d95924c41b9de21a373a685568c6 100644 (file)
@@ -897,11 +897,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
        }
 
        opcode = *(u8 *)rsp;
-       if (c4iw_handlers[opcode])
+       if (c4iw_handlers[opcode]) {
                c4iw_handlers[opcode](dev, skb);
-       else
+       } else {
                pr_info("%s no handler opcode 0x%x...\n", __func__,
                       opcode);
+               kfree_skb(skb);
+       }
 
        return 0;
 nomem:
index 23eaeabab93b50d483e279de2adb7175c7c29dfc..a1e8f1333b793f648fc40140909f812e30bb25f4 100644 (file)
@@ -369,6 +369,7 @@ struct c4iw_fr_page_list {
        DEFINE_DMA_UNMAP_ADDR(mapping);
        dma_addr_t dma_addr;
        struct c4iw_dev *dev;
+       int pll_len;
 };
 
 static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
@@ -441,6 +442,7 @@ struct c4iw_qp {
        atomic_t refcnt;
        wait_queue_head_t wait;
        struct timer_list timer;
+       int sq_sig_all;
 };
 
 static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
index 392d422b00cb705ccd1a5b095b06a12b831d24af..f9ca072a99ed2dbbf2f401dc7b798fd9aa0900d1 100644 (file)
@@ -37,9 +37,9 @@
 
 #include "iw_cxgb4.h"
 
-int use_dsgl = 1;
+int use_dsgl = 0;
 module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
 
 #define T4_ULPTX_MIN_IO 32
 #define C4IW_MAX_INLINE_SIZE 96
@@ -898,7 +898,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
        dma_unmap_addr_set(c4pl, mapping, dma_addr);
        c4pl->dma_addr = dma_addr;
        c4pl->dev = dev;
-       c4pl->ibpl.max_page_list_len = pll_len;
+       c4pl->pll_len = pll_len;
+
+       PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+            __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+            &c4pl->dma_addr);
 
        return &c4pl->ibpl;
 }
@@ -907,8 +911,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
 {
        struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
 
+       PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+            __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+            &c4pl->dma_addr);
+
        dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
-                         c4pl->ibpl.max_page_list_len,
+                         c4pl->pll_len,
                          c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
        kfree(c4pl);
 }
index 582936708e6e492dfca46b88b5db98f16159c0db..723ad290bd9d0739d1256b3b9bc9d91a1fa9cf20 100644 (file)
@@ -675,7 +675,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                fw_flags = 0;
                if (wr->send_flags & IB_SEND_SOLICITED)
                        fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
-               if (wr->send_flags & IB_SEND_SIGNALED)
+               if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
                        fw_flags |= FW_RI_COMPLETION_FLAG;
                swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
                switch (wr->opcode) {
@@ -736,7 +736,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                }
                swsqe->idx = qhp->wq.sq.pidx;
                swsqe->complete = 0;
-               swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+               swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+                                 qhp->sq_sig_all;
                swsqe->flushed = 0;
                swsqe->wr_id = wr->wr_id;
 
@@ -1533,7 +1534,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        struct c4iw_cq *schp;
        struct c4iw_cq *rchp;
        struct c4iw_create_qp_resp uresp;
-       int sqsize, rqsize;
+       unsigned int sqsize, rqsize;
        struct c4iw_ucontext *ucontext;
        int ret;
        struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;
@@ -1605,6 +1606,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        qhp->attr.enable_bind = 1;
        qhp->attr.max_ord = 1;
        qhp->attr.max_ird = 1;
+       qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
        spin_lock_init(&qhp->lock);
        mutex_init(&qhp->mutex);
        init_waitqueue_head(&qhp->wait);
index 212150c25ea08ade17d2713b882db69bb502ca03..8cc837537768f97f99041ec7ab69d2e111dba0c0 100644 (file)
@@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                        (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
                if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
                        ehca_err(device, "Copy to udata failed.");
+                       cq = ERR_PTR(-EFAULT);
                        goto create_cq_exit4;
                }
        }
index 7168f594d45761107ee6f6fa6555c24bffd389a1..3488e8c9fcb44b89211e0650c47a87a2fd469e65 100644 (file)
@@ -2534,16 +2534,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
        /* This is only a stub; nothing to be done here */
 }
 
-static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
-       return sg->dma_address;
-}
-
-static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
-{
-       return sg->length;
-}
-
 static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
                                         size_t size,
                                         enum dma_data_direction dir)
@@ -2596,8 +2586,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
        .unmap_page             = ehca_dma_unmap_page,
        .map_sg                 = ehca_dma_map_sg,
        .unmap_sg               = ehca_dma_unmap_sg,
-       .dma_address            = ehca_dma_address,
-       .dma_len                = ehca_dma_len,
        .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
        .sync_single_for_device = ehca_dma_sync_single_for_device,
        .alloc_coherent         = ehca_dma_alloc_coherent,
index 714293b78518598c7fe262315de6cbfaafe9263b..e2f9a51f4a38697aa6cccb261e7ee1c4ab346ad6 100644 (file)
@@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                                   size_t count, loff_t *off)
 {
        u32 __iomem *piobuf;
-       u32 plen, clen, pbufn;
+       u32 plen, pbufn, maxlen_reserve;
        struct ipath_diag_pkt odp;
        struct ipath_diag_xpkt dp;
        u32 *tmpbuf = NULL;
@@ -335,51 +335,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
        u64 val;
        u32 l_state, lt_state; /* LinkState, LinkTrainingState */
 
-       if (count < sizeof(odp)) {
-               ret = -EINVAL;
-               goto bail;
-       }
 
        if (count == sizeof(dp)) {
                if (copy_from_user(&dp, data, sizeof(dp))) {
                        ret = -EFAULT;
                        goto bail;
                }
-       } else if (copy_from_user(&odp, data, sizeof(odp))) {
-               ret = -EFAULT;
+       } else if (count == sizeof(odp)) {
+               if (copy_from_user(&odp, data, sizeof(odp))) {
+                       ret = -EFAULT;
+                       goto bail;
+               }
+       } else {
+               ret = -EINVAL;
                goto bail;
        }
 
-       /*
-        * Due to padding/alignment issues (lessened with new struct)
-        * the old and new structs are the same length. We need to
-        * disambiguate them, which we can do because odp.len has never
-        * been less than the total of LRH+BTH+DETH so far, while
-        * dp.unit (same offset) unit is unlikely to get that high.
-        * Similarly, dp.data, the pointer to user at the same offset
-        * as odp.unit, is almost certainly at least one (512byte)page
-        * "above" NULL. The if-block below can be omitted if compatibility
-        * between a new driver and older diagnostic code is unimportant.
-        * compatibility the other direction (new diags, old driver) is
-        * handled in the diagnostic code, with a warning.
-        */
-       if (dp.unit >= 20 && dp.data < 512) {
-               /* very probable version mismatch. Fix it up */
-               memcpy(&odp, &dp, sizeof(odp));
-               /* We got a legacy dp, copy elements to dp */
-               dp.unit = odp.unit;
-               dp.data = odp.data;
-               dp.len = odp.len;
-               dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
-       }
-
        /* send count must be an exact number of dwords */
        if (dp.len & 3) {
                ret = -EINVAL;
                goto bail;
        }
 
-       clen = dp.len >> 2;
+       plen = dp.len >> 2;
 
        dd = ipath_lookup(dp.unit);
        if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
@@ -422,16 +400,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                goto bail;
        }
 
-       /* need total length before first word written */
-       /* +1 word is for the qword padding */
-       plen = sizeof(u32) + dp.len;
-
-       if ((plen + 4) > dd->ipath_ibmaxlen) {
+       /*
+        * need total length before first word written, plus 2 Dwords. One Dword
+        * is for padding so we get the full user data when not aligned on
+        * a word boundary. The other Dword is to make sure we have room for the
+        * ICRC which gets tacked on later.
+        */
+       maxlen_reserve = 2 * sizeof(u32);
+       if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
                ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-                         plen - 4, dd->ipath_ibmaxlen);
+                         dp.len, dd->ipath_ibmaxlen);
                ret = -EINVAL;
-               goto bail;      /* before writing pbc */
+               goto bail;
        }
+
+       plen = sizeof(u32) + dp.len;
+
        tmpbuf = vmalloc(plen);
        if (!tmpbuf) {
                dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
@@ -473,11 +457,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
         */
        if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
                ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+               __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
                ipath_flush_wc();
-               __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
        } else
-               __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+               __iowrite32_copy(piobuf + 2, tmpbuf, plen);
 
        ipath_flush_wc();
 
index 644c2c74e054dce8a50fa30ac64ecfd2d59654e5..123a8c053539665610cf25c9d015129eb1131e12 100644 (file)
@@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
                        ret = 0;
                        break;
                }
+               sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+               sg->dma_length = sg->length;
+#endif
        }
        return ret;
 }
@@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev,
        BUG_ON(!valid_dma_direction(direction));
 }
 
-static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
-       u64 addr = (u64) page_address(sg_page(sg));
-
-       if (addr)
-               addr += sg->offset;
-       return addr;
-}
-
-static unsigned int ipath_sg_dma_len(struct ib_device *dev,
-                                    struct scatterlist *sg)
-{
-       return sg->length;
-}
-
 static void ipath_sync_single_for_cpu(struct ib_device *dev,
                                      u64 addr,
                                      size_t size,
@@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
 }
 
 struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-       ipath_mapping_error,
-       ipath_dma_map_single,
-       ipath_dma_unmap_single,
-       ipath_dma_map_page,
-       ipath_dma_unmap_page,
-       ipath_map_sg,
-       ipath_unmap_sg,
-       ipath_sg_dma_address,
-       ipath_sg_dma_len,
-       ipath_sync_single_for_cpu,
-       ipath_sync_single_for_device,
-       ipath_dma_alloc_coherent,
-       ipath_dma_free_coherent
+       .mapping_error = ipath_mapping_error,
+       .map_single = ipath_dma_map_single,
+       .unmap_single = ipath_dma_unmap_single,
+       .map_page = ipath_dma_map_page,
+       .unmap_page = ipath_dma_unmap_page,
+       .map_sg = ipath_map_sg,
+       .unmap_sg = ipath_unmap_sg,
+       .sync_single_for_cpu = ipath_sync_single_for_cpu,
+       .sync_single_for_device = ipath_sync_single_for_device,
+       .alloc_coherent = ipath_dma_alloc_coherent,
+       .free_coherent = ipath_dma_free_coherent
 };
index e81c5547e6479a87683dba621c169529f4209d5a..20b4d7a2d3d748170b18a08aacbee8e8016c3a1c 100644 (file)
@@ -1803,7 +1803,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
 
 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 {
-       char name[32];
+       char name[80];
        int eq_per_port = 0;
        int added_eqs = 0;
        int total_eqs = 0;
@@ -1833,8 +1833,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
        eq = 0;
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
                for (j = 0; j < eq_per_port; j++) {
-                       sprintf(name, "mlx4-ib-%d-%d@%s",
-                               i, j, dev->pdev->bus->name);
+                       snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
+                                i, j, dev->pdev->bus->name);
                        /* Set IRQ for specific name (per ring) */
                        if (mlx4_assign_eq(dev, name, NULL,
                                           &ibdev->eq_table[eq])) {
@@ -2056,8 +2056,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                        err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
                        if (err)
                                ibdev->counters[i] = -1;
-               } else
-                               ibdev->counters[i] = -1;
+               } else {
+                       ibdev->counters[i] = -1;
+               }
        }
 
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
index d8f4d1fe849430ceb40eab47470d5af033884694..74993250523e7f81c810cedb15436db8dd0ff0b8 100644 (file)
@@ -1882,7 +1882,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                                return err;
                }
 
-               if (ah->av.eth.vlan != 0xffff) {
+               if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
                        vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
                        is_vlan = 1;
                }
index 64408000f1c7f8a85e1652954bfd3ea6deea48ea..415f8e1a54dbc82cf4ab81bf5ad98bd9034d0733 100644 (file)
@@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
 
        if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
                mthca_free_cq(to_mdev(ibdev), cq);
+               err = -EFAULT;
                goto err_free;
        }
 
index 9c9f2f57e960e1803b831aaf1a94b87011822844..dfa9df484505e6ecdc5362f758ade1627b98d0c7 100644 (file)
@@ -128,6 +128,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8);
 static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
 
 static void print_core(struct nes_cm_core *core);
+static void record_ird_ord(struct nes_cm_node *, u16, u16);
 
 /* External CM API Interface */
 /* instance of function pointers for client API */
@@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
                }
        }
 
-
        if (priv_data_len + mpa_hdr_len != len) {
                nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
                        " complete (%x + %x != %x)\n",
@@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
                        /* send reset */
                        return -EINVAL;
                }
+               if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
+                       cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
 
-               if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+               if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
                        /* responder */
-                       if (cm_node->ord_size > ird_size)
-                               cm_node->ord_size = ird_size;
-               } else {
-                       /* initiator */
-                       if (cm_node->ord_size > ird_size)
-                               cm_node->ord_size = ird_size;
-
-                       if (cm_node->ird_size < ord_size) {
-                               /* no resources available */
-                               /* send terminate message */
-                               return -EINVAL;
+                       if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+                               /* we are still negotiating */
+                               if (ord_size > NES_MAX_IRD) {
+                                       cm_node->ird_size = NES_MAX_IRD;
+                               } else {
+                                       cm_node->ird_size = ord_size;
+                                       if (ord_size == 0 &&
+                                       (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+                                               cm_node->ird_size = 1;
+                                               nes_debug(NES_DBG_CM,
+                                               "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
+                                                       __func__, ord_size);
+                                       }
+                               }
+                               if (ird_size > NES_MAX_ORD)
+                                       cm_node->ord_size = NES_MAX_ORD;
+                               else
+                                       cm_node->ord_size = ird_size;
+                       } else { /* initiator */
+                               if (ord_size > NES_MAX_IRD) {
+                                       nes_debug(NES_DBG_CM,
+                                       "%s: Unable to support the requested (ord =%u)\n",
+                                                       __func__, ord_size);
+                                       return -EINVAL;
+                               }
+                               cm_node->ird_size = ord_size;
+
+                               if (ird_size > NES_MAX_ORD) {
+                                       cm_node->ord_size = NES_MAX_ORD;
+                               } else {
+                                       if (ird_size == 0 &&
+                                       (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+                                               nes_debug(NES_DBG_CM,
+                                               "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
+                                                       __func__, ird_size);
+                                               return -EINVAL;
+                                       } else {
+                                               cm_node->ord_size = ird_size;
+                                       }
+                               }
                        }
                }
 
                if (rtr_ctrl_ord & IETF_RDMA0_READ) {
                        cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+
                } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
                        cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
                } else {        /* Not supported RDMA0 operation */
@@ -514,6 +546,19 @@ static void print_core(struct nes_cm_core *core)
        nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
 }
 
+static void record_ird_ord(struct nes_cm_node *cm_node,
+                                       u16 conn_ird, u16 conn_ord)
+{
+       if (conn_ird > NES_MAX_IRD)
+               conn_ird = NES_MAX_IRD;
+
+       if (conn_ord > NES_MAX_ORD)
+               conn_ord = NES_MAX_ORD;
+
+       cm_node->ird_size = conn_ird;
+       cm_node->ord_size = conn_ord;
+}
+
 /**
  * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
  */
@@ -557,11 +602,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
        mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
 
        /* initialize RTR msg */
-       ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
-                           IETF_NO_IRD_ORD : cm_node->ird_size;
-       ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
-                           IETF_NO_IRD_ORD : cm_node->ord_size;
-
+       if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+               ctrl_ird = IETF_NO_IRD_ORD;
+               ctrl_ord = IETF_NO_IRD_ORD;
+       } else {
+               ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
+               ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
+       }
        ctrl_ird |= IETF_PEER_TO_PEER;
        ctrl_ird |= IETF_FLPDU_ZERO_LEN;
 
@@ -610,7 +657,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
        struct nes_qp *nesqp = *nesqp_addr;
        struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
 
-       u64temp = (unsigned long)nesqp;
+       u64temp = (unsigned long)nesqp->nesuqp_addr;
        u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
        set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
 
@@ -1409,8 +1456,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 
        cm_node->mpa_frame_rev = mpa_version;
        cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
-       cm_node->ird_size = IETF_NO_IRD_ORD;
-       cm_node->ord_size = IETF_NO_IRD_ORD;
+       cm_node->mpav2_ird_ord = 0;
+       cm_node->ird_size = 0;
+       cm_node->ord_size = 0;
 
        nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
                  &cm_node->loc_addr, cm_node->loc_port,
@@ -3027,11 +3075,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                return -ECONNRESET;
        }
-
        /* associate the node with the QP */
        nesqp->cm_node = (void *)cm_node;
        cm_node->nesqp = nesqp;
 
+
        nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
                nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
        atomic_inc(&cm_accepts);
@@ -3054,6 +3102,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (cm_node->mpa_frame_rev == IETF_MPA_V1)
                mpa_frame_offset = 4;
 
+       if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
+                       cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+               record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+       }
+
        memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
               conn_param->private_data_len);
 
@@ -3117,7 +3170,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        }
        nesqp->skip_lsmm = 1;
 
-
        /* Cache the cm_id in the qp */
        nesqp->cm_id = cm_id;
        cm_node->cm_id = cm_id;
@@ -3154,7 +3206,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
                ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
        nesqp->nesqp_context->ird_ord_sizes |=
-               cpu_to_le32((u32)conn_param->ord);
+               cpu_to_le32((u32)cm_node->ord_size);
 
        memset(&nes_quad, 0, sizeof(nes_quad));
        nes_quad.DstIpAdrIndex =
@@ -3194,6 +3246,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        cm_event.remote_addr = cm_id->remote_addr;
        cm_event.private_data = NULL;
        cm_event.private_data_len = 0;
+       cm_event.ird = cm_node->ird_size;
+       cm_event.ord = cm_node->ord_size;
+
        ret = cm_id->event_handler(cm_id, &cm_event);
        attr.qp_state = IB_QPS_RTS;
        nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
@@ -3290,14 +3345,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        /* cache the cm_id in the qp */
        nesqp->cm_id = cm_id;
-
        cm_id->provider_data = nesqp;
-
        nesqp->private_data_len = conn_param->private_data_len;
-       nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
-       /* space for rdma0 read msg */
-       if (conn_param->ord == 0)
-               nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1);
 
        nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
        nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
@@ -3334,6 +3383,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                return -ENOMEM;
        }
 
+       record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+       if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+                               cm_node->ord_size == 0)
+               cm_node->ord_size = 1;
+
        cm_node->apbvt_set = apbvt_set;
        nesqp->cm_node = cm_node;
        cm_node->nesqp = nesqp;
@@ -3530,6 +3584,8 @@ static void cm_event_connected(struct nes_cm_event *event)
        nesqp->nesqp_context->ird_ord_sizes |=
                        cpu_to_le32((u32)1 <<
                        NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+       nesqp->nesqp_context->ird_ord_sizes |=
+                       cpu_to_le32((u32)cm_node->ord_size);
 
        /* Adjust tail for not having a LSMM */
        /*nesqp->hwqp.sq_tail = 1;*/
@@ -3742,8 +3798,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
        cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
        cm_event.private_data = cm_node->mpa_frame_buf;
        cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+       if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
+               cm_event.ird = NES_MAX_IRD;
+               cm_event.ord = NES_MAX_ORD;
+       } else {
        cm_event.ird = cm_node->ird_size;
        cm_event.ord = cm_node->ord_size;
+       }
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        if (ret)
index 4646e66660874c4a6b2a5b9a4290c0132096e9f7..522c99cd07c4339a89f6eafb3073fcc0f1577bb6 100644 (file)
@@ -58,6 +58,8 @@
 #define IETF_RDMA0_WRITE        0x8000
 #define IETF_RDMA0_READ         0x4000
 #define IETF_NO_IRD_ORD         0x3FFF
+#define NES_MAX_IRD             0x40
+#define NES_MAX_ORD             0x7F
 
 enum ietf_mpa_flags {
        IETF_MPA_FLAGS_MARKERS = 0x80,  /* receive Markers */
@@ -333,6 +335,7 @@ struct nes_cm_node {
        enum mpa_frame_version    mpa_frame_rev;
        u16                       ird_size;
        u16                       ord_size;
+       u16                       mpav2_ird_ord;
 
        u16                       mpa_frame_size;
        struct iw_cm_id           *cm_id;
index 4926de744488e71b4ec6e618825ed43218b2f519..529c421bb15caf74e5d7c33072e1b2a18e80b96b 100644 (file)
@@ -39,8 +39,8 @@
 
 #include <linux/types.h>
 
-#define NES_ABI_USERSPACE_VER 1
-#define NES_ABI_KERNEL_VER    1
+#define NES_ABI_USERSPACE_VER 2
+#define NES_ABI_KERNEL_VER    2
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -78,6 +78,7 @@ struct nes_create_cq_req {
 
 struct nes_create_qp_req {
        __u64 user_wqe_buffers;
+       __u64 user_qp_buffer;
 };
 
 enum iwnes_memreg_type {
index 32d3682daaf53f37302762035400ae1782c7fe7d..218dd35742851f14f5381dc734b245a599ad86e4 100644 (file)
@@ -1186,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                                        nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
                                        kfree(nesqp->allocated_buffer);
                                        nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
-                                       return NULL;
+                                       return ERR_PTR(-EFAULT);
                                }
                                if (req.user_wqe_buffers) {
                                        virt_wqs = 1;
                                }
+                               if (req.user_qp_buffer)
+                                       nesqp->nesuqp_addr = req.user_qp_buffer;
                                if ((ibpd->uobject) && (ibpd->uobject->context)) {
                                        nesqp->user_mode = 1;
                                        nes_ucontext = to_nesucontext(ibpd->uobject->context);
@@ -3135,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
                                nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
                                original_last_aeq, nesqp->last_aeq);
-               if ((!ret) ||
-                               ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
-                               (ret))) {
+               if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
                        if (dont_wait) {
                                if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
                                        nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
index 0eff7c44d76b00bbcb1bbe06602dd54606d390ee..309b31c31ae1ac5e9b7bcd44701f22d65e0a1630 100644 (file)
@@ -184,5 +184,6 @@ struct nes_qp {
        u8                    pau_busy;
        u8                    pau_pending;
        u8                    pau_state;
+       __u64                 nesuqp_addr;
 };
 #endif                 /* NES_VERBS_H */
index 06a5bed12e433084000f2163d33a3f737cf9333a..d1bfd4f4cdde4301f80d4fd75cb218fafd797b76 100644 (file)
@@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet
 
 obj-$(CONFIG_INFINIBAND_OCRDMA)        += ocrdma.o
 
-ocrdma-y :=    ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o
+ocrdma-y :=    ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o
index 7c001b97b23fd6adc081f7ee9fc1ea62f4df01c0..19011dbb930fb38d899c6f365bd981ce94a70174 100644 (file)
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
 
 #include <be_roce.h>
 #include "ocrdma_sli.h"
 
-#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+
+#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
 #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
 
+#define OC_NAME_SH     OCRDMA_NODE_DESC "(Skyhawk)"
+#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)"
+
+#define OC_SKH_DEVICE_PF 0x720
+#define OC_SKH_DEVICE_VF 0x728
 #define OCRDMA_MAX_AH 512
 
 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
 
+#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo)
+
 struct ocrdma_dev_attr {
        u8 fw_ver[32];
        u32 vendor_id;
@@ -65,6 +75,7 @@ struct ocrdma_dev_attr {
        int max_mr;
        u64 max_mr_size;
        u32 max_num_mr_pbl;
+       int max_mw;
        int max_fmr;
        int max_map_per_fmr;
        int max_pages_per_frmr;
@@ -83,6 +94,12 @@ struct ocrdma_dev_attr {
        u8 num_ird_pages;
 };
 
+struct ocrdma_dma_mem {
+       void *va;
+       dma_addr_t pa;
+       u32 size;
+};
+
 struct ocrdma_pbl {
        void *va;
        dma_addr_t pa;
@@ -148,6 +165,26 @@ struct ocrdma_mr {
        struct ocrdma_hw_mr hwmr;
 };
 
+struct ocrdma_stats {
+       u8 type;
+       struct ocrdma_dev *dev;
+};
+
+struct stats_mem {
+       struct ocrdma_mqe mqe;
+       void *va;
+       dma_addr_t pa;
+       u32 size;
+       char *debugfs_mem;
+};
+
+struct phy_info {
+       u16 auto_speeds_supported;
+       u16 fixed_speeds_supported;
+       u16 phy_type;
+       u16 interface_type;
+};
+
 struct ocrdma_dev {
        struct ib_device ibdev;
        struct ocrdma_dev_attr attr;
@@ -191,12 +228,30 @@ struct ocrdma_dev {
        struct mqe_ctx mqe_ctx;
 
        struct be_dev_info nic_info;
+       struct phy_info phy;
+       char model_number[32];
+       u32 hba_port_num;
 
        struct list_head entry;
        struct rcu_head rcu;
        int id;
-       struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
+       u64 stag_arr[OCRDMA_MAX_STAG];
        u16 pvid;
+       u32 asic_id;
+
+       ulong last_stats_time;
+       struct mutex stats_lock; /* provide synch for debugfs operations */
+       struct stats_mem stats_mem;
+       struct ocrdma_stats rsrc_stats;
+       struct ocrdma_stats rx_stats;
+       struct ocrdma_stats wqe_stats;
+       struct ocrdma_stats tx_stats;
+       struct ocrdma_stats db_err_stats;
+       struct ocrdma_stats tx_qp_err_stats;
+       struct ocrdma_stats rx_qp_err_stats;
+       struct ocrdma_stats tx_dbg_stats;
+       struct ocrdma_stats rx_dbg_stats;
+       struct dentry *dir;
 };
 
 struct ocrdma_cq {
@@ -209,8 +264,8 @@ struct ocrdma_cq {
                         */
        u32 max_hw_cqe;
        bool phase_change;
-       bool armed, solicited;
-       bool arm_needed;
+       bool deferred_arm, deferred_sol;
+       bool first_arm;
 
        spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
                                                   * to cq polling
@@ -223,6 +278,7 @@ struct ocrdma_cq {
        struct ocrdma_ucontext *ucontext;
        dma_addr_t pa;
        u32 len;
+       u32 cqe_cnt;
 
        /* head of all qp's sq and rq for which cqes need to be flushed
         * by the software.
@@ -232,7 +288,6 @@ struct ocrdma_cq {
 
 struct ocrdma_pd {
        struct ib_pd ibpd;
-       struct ocrdma_dev *dev;
        struct ocrdma_ucontext *uctx;
        u32 id;
        int num_dpp_qp;
@@ -317,10 +372,8 @@ struct ocrdma_qp {
        bool dpp_enabled;
        u8 *ird_q_va;
        bool signaled;
-       u16 db_cache;
 };
 
-
 struct ocrdma_ucontext {
        struct ib_ucontext ibucontext;
 
@@ -385,13 +438,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
        return container_of(ibsrq, struct ocrdma_srq, ibsrq);
 }
 
-
-static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
-{
-       return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
-                qp->id < 128) ? 24 : 16);
-}
-
 static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
 {
        int cqe_valid;
@@ -436,4 +482,40 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
        return 0;
 }
 
+static inline char *hca_name(struct ocrdma_dev *dev)
+{
+       switch (dev->nic_info.pdev->device) {
+       case OC_SKH_DEVICE_PF:
+       case OC_SKH_DEVICE_VF:
+               return OC_NAME_SH;
+       default:
+               return OC_NAME_UNKNOWN;
+       }
+}
+
+static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev,
+               int eqid)
+{
+       int indx;
+
+       for (indx = 0; indx < dev->eq_cnt; indx++) {
+               if (dev->eq_tbl[indx].q.id == eqid)
+                       return indx;
+       }
+
+       return -EINVAL;
+}
+
+static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
+{
+       if (dev->nic_info.dev_family == 0xF && !dev->asic_id) {
+               pci_read_config_dword(
+                       dev->nic_info.pdev,
+                       OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id);
+       }
+
+       return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >>
+                               OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
+}
+
 #endif
index fbac8eb44036b02ff7c1233f76e951a0e0adfe8a..1554cca5712aafd5659d1cfb80bafff7095f67ca 100644 (file)
@@ -28,7 +28,8 @@
 #ifndef __OCRDMA_ABI_H__
 #define __OCRDMA_ABI_H__
 
-#define OCRDMA_ABI_VERSION 1
+#define OCRDMA_ABI_VERSION 2
+#define OCRDMA_BE_ROCE_ABI_VERSION 1
 /* user kernel communication data structures. */
 
 struct ocrdma_alloc_ucontext_resp {
@@ -107,9 +108,7 @@ struct ocrdma_create_qp_uresp {
        u32 db_sq_offset;
        u32 db_rq_offset;
        u32 db_shift;
-       u64 rsvd1;
-       u64 rsvd2;
-       u64 rsvd3;
+       u64 rsvd[11];
 } __packed;
 
 struct ocrdma_create_srq_uresp {
index 34071143006ed17124fc15b7be8218b935319720..d4cc01f10c015654f966000ef28c6b7a82435232 100644 (file)
@@ -100,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        if (!(attr->ah_flags & IB_AH_GRH))
                return ERR_PTR(-EINVAL);
 
-       ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+       ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
        if (!ah)
                return ERR_PTR(-ENOMEM);
 
index 1664d648cbfc0531080056849d679622bcf0ea18..3bbf2010a82180e1f178e2af168dfd9aa20b195c 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
-#include <rdma/ib_addr.h>
 
 #include "ocrdma.h"
 #include "ocrdma_hw.h"
@@ -243,6 +242,23 @@ static int ocrdma_get_mbx_errno(u32 status)
        return err_num;
 }
 
+char *port_speed_string(struct ocrdma_dev *dev)
+{
+       char *str = "";
+       u16 speeds_supported;
+
+       speeds_supported = dev->phy.fixed_speeds_supported |
+                               dev->phy.auto_speeds_supported;
+       if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS)
+               str = "40Gbps ";
+       else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS)
+               str = "10Gbps ";
+       else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS)
+               str = "1Gbps ";
+
+       return str;
+}
+
 static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
 {
        int err_num = -EINVAL;
@@ -332,6 +348,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
        return mqe;
 }
 
+static void *ocrdma_alloc_mqe(void)
+{
+       return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+}
+
 static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
 {
        dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -364,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
        }
 }
 
-static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
-                              int queue_type)
+static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev,
+                              struct ocrdma_queue_info *q, int queue_type)
 {
        u8 opcode = 0;
        int status;
@@ -444,7 +465,7 @@ mbx_err:
        return status;
 }
 
-static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
 {
        int irq;
 
@@ -574,6 +595,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev)
        if (status)
                goto alloc_err;
 
+       dev->eq_tbl[0].cq_cnt++;
        status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
        if (status)
                goto mbx_cq_free;
@@ -639,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 {
        struct ocrdma_qp *qp = NULL;
        struct ocrdma_cq *cq = NULL;
-       struct ib_event ib_evt;
+       struct ib_event ib_evt = { 0 };
        int cq_event = 0;
        int qp_event = 1;
        int srq_event = 0;
@@ -664,6 +686,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
        case OCRDMA_CQ_OVERRUN_ERROR:
                ib_evt.element.cq = &cq->ibcq;
                ib_evt.event = IB_EVENT_CQ_ERR;
+               cq_event = 1;
+               qp_event = 0;
                break;
        case OCRDMA_CQ_QPCAT_ERROR:
                ib_evt.element.qp = &qp->ibqp;
@@ -725,6 +749,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
                                                     qp->srq->ibsrq.
                                                     srq_context);
        } else if (dev_event) {
+               pr_err("%s: Fatal event received\n", dev->ibdev.name);
                ib_dispatch_event(&ib_evt);
        }
 
@@ -752,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
        }
 }
 
-
 static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
 {
        /* async CQE processing */
@@ -799,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
                        ocrdma_process_acqe(dev, cqe);
                else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
                        ocrdma_process_mcqe(dev, cqe);
-               else
-                       pr_err("%s() cqe->compl is not set.\n", __func__);
                memset(cqe, 0, sizeof(struct ocrdma_mcqe));
                ocrdma_mcq_inc_tail(dev);
        }
@@ -858,16 +880,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
                BUG();
 
        cq = dev->cq_tbl[cq_idx];
-       if (cq == NULL) {
-               pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
+       if (cq == NULL)
                return;
-       }
-       spin_lock_irqsave(&cq->cq_lock, flags);
-       cq->armed = false;
-       cq->solicited = false;
-       spin_unlock_irqrestore(&cq->cq_lock, flags);
-
-       ocrdma_ring_cq_db(dev, cq->id, false, false, 0);
 
        if (cq->ibcq.comp_handler) {
                spin_lock_irqsave(&cq->comp_handler_lock, flags);
@@ -892,27 +906,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
        struct ocrdma_dev *dev = eq->dev;
        struct ocrdma_eqe eqe;
        struct ocrdma_eqe *ptr;
-       u16 eqe_popped = 0;
        u16 cq_id;
-       while (1) {
+       int budget = eq->cq_cnt;
+
+       do {
                ptr = ocrdma_get_eqe(eq);
                eqe = *ptr;
                ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
                if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
                        break;
-               eqe_popped += 1;
+
                ptr->id_valid = 0;
+               /* ring eq doorbell as soon as its consumed. */
+               ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1);
                /* check whether its CQE or not. */
                if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
                        cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
                        ocrdma_cq_handler(dev, cq_id);
                }
                ocrdma_eq_inc_tail(eq);
-       }
-       ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped);
-       /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */
-       if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
-               ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+
+               /* There can be a stale EQE after the last bound CQ is
+                * destroyed. EQE valid and budget == 0 implies this.
+                */
+               if (budget)
+                       budget--;
+
+       } while (budget);
+
+       ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
        return IRQ_HANDLED;
 }
 
@@ -949,7 +971,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
 {
        int status = 0;
        u16 cqe_status, ext_status;
-       struct ocrdma_mqe *rsp;
+       struct ocrdma_mqe *rsp_mqe;
+       struct ocrdma_mbx_rsp *rsp = NULL;
 
        mutex_lock(&dev->mqe_ctx.lock);
        ocrdma_post_mqe(dev, mqe);
@@ -958,23 +981,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
                goto mbx_err;
        cqe_status = dev->mqe_ctx.cqe_status;
        ext_status = dev->mqe_ctx.ext_status;
-       rsp = ocrdma_get_mqe_rsp(dev);
-       ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
+       rsp_mqe = ocrdma_get_mqe_rsp(dev);
+       ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe)));
+       if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+                               OCRDMA_MQE_HDR_EMB_SHIFT)
+               rsp = &mqe->u.rsp;
+
        if (cqe_status || ext_status) {
-               pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
-                      __func__,
-                    (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
-                    OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
+               pr_err("%s() cqe_status=0x%x, ext_status=0x%x,",
+                      __func__, cqe_status, ext_status);
+               if (rsp) {
+                       /* This is for embedded cmds. */
+                       pr_err("opcode=0x%x, subsystem=0x%x\n",
+                              (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+                               OCRDMA_MBX_RSP_OPCODE_SHIFT,
+                               (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+                               OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+               }
                status = ocrdma_get_mbx_cqe_errno(cqe_status);
                goto mbx_err;
        }
-       if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)
+       /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */
+       if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK))
                status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
 mbx_err:
        mutex_unlock(&dev->mqe_ctx.lock);
        return status;
 }
 
+static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
+                                void *payload_va)
+{
+       int status = 0;
+       struct ocrdma_mbx_rsp *rsp = payload_va;
+
+       if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+                               OCRDMA_MQE_HDR_EMB_SHIFT)
+               BUG();
+
+       status = ocrdma_mbx_cmd(dev, mqe);
+       if (!status)
+               /* For non embedded, only CQE failures are handled in
+                * ocrdma_mbx_cmd. We need to check for RSP errors.
+                */
+               if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK)
+                       status = ocrdma_get_mbx_errno(rsp->status);
+
+       if (status)
+               pr_err("opcode=0x%x, subsystem=0x%x\n",
+                      (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+                       OCRDMA_MBX_RSP_OPCODE_SHIFT,
+                       (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+                       OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+       return status;
+}
+
 static void ocrdma_get_attr(struct ocrdma_dev *dev,
                              struct ocrdma_dev_attr *attr,
                              struct ocrdma_mbx_query_config *rsp)
@@ -985,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_qp =
            (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
+       attr->max_srq =
+               (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+               OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
        attr->max_send_sge = ((rsp->max_write_send_sge &
                               OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
                              OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
@@ -1000,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
-       attr->max_srq =
-               (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
-               OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
        attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
@@ -1015,6 +1076,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
                                    OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
            OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
+       attr->max_mw = rsp->max_mw;
        attr->max_mr = rsp->max_mr;
        attr->max_mr_size = ~0ull;
        attr->max_fmr = 0;
@@ -1036,7 +1098,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_inline_data =
            attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
                              sizeof(struct ocrdma_sge));
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                attr->ird = 1;
                attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
                attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
@@ -1110,6 +1172,96 @@ mbx_err:
        return status;
 }
 
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
+{
+       struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
+       struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
+       struct ocrdma_rdma_stats_resp *old_stats = NULL;
+       int status;
+
+       old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+       if (old_stats == NULL)
+               return -ENOMEM;
+
+       memset(mqe, 0, sizeof(*mqe));
+       mqe->hdr.pyld_len = dev->stats_mem.size;
+       mqe->hdr.spcl_sge_cnt_emb |=
+                       (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+                               OCRDMA_MQE_HDR_SGE_CNT_MASK;
+       mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff);
+       mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa);
+       mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size;
+
+       /* Cache the old stats */
+       memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp));
+       memset(req, 0, dev->stats_mem.size);
+
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)req,
+                       OCRDMA_CMD_GET_RDMA_STATS,
+                       OCRDMA_SUBSYS_ROCE,
+                       dev->stats_mem.size);
+       if (reset)
+               req->reset_stats = reset;
+
+       status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va);
+       if (status)
+               /* Copy from cache, if mbox fails */
+               memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp));
+       else
+               ocrdma_le32_to_cpu(req, dev->stats_mem.size);
+
+       kfree(old_stats);
+       return status;
+}
+
+static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
+{
+       int status = -ENOMEM;
+       struct ocrdma_dma_mem dma;
+       struct ocrdma_mqe *mqe;
+       struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
+       struct mgmt_hba_attribs *hba_attribs;
+
+       mqe = ocrdma_alloc_mqe();
+       if (!mqe)
+               return status;
+       memset(mqe, 0, sizeof(*mqe));
+
+       dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
+       dma.va   = dma_alloc_coherent(&dev->nic_info.pdev->dev,
+                                       dma.size, &dma.pa, GFP_KERNEL);
+       if (!dma.va)
+               goto free_mqe;
+
+       mqe->hdr.pyld_len = dma.size;
+       mqe->hdr.spcl_sge_cnt_emb |=
+                       (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+                       OCRDMA_MQE_HDR_SGE_CNT_MASK;
+       mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff);
+       mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
+       mqe->u.nonemb_req.sge[0].len = dma.size;
+
+       memset(dma.va, 0, dma.size);
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
+                       OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
+                       OCRDMA_SUBSYS_COMMON,
+                       dma.size);
+
+       status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va);
+       if (!status) {
+               ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
+               hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
+
+               dev->hba_port_num = hba_attribs->phy_port;
+               strncpy(dev->model_number,
+                       hba_attribs->controller_model_number, 31);
+       }
+       dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
+free_mqe:
+       kfree(mqe);
+       return status;
+}
+
 static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
 {
        int status = -ENOMEM;
@@ -1157,6 +1309,35 @@ mbx_err:
        return status;
 }
 
+static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
+{
+       int status = -ENOMEM;
+       struct ocrdma_mqe *cmd;
+       struct ocrdma_get_phy_info_rsp *rsp;
+
+       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd));
+       if (!cmd)
+               return status;
+
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+                       OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON,
+                       sizeof(*cmd));
+
+       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+       if (status)
+               goto mbx_err;
+
+       rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
+       dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+       dev->phy.auto_speeds_supported  =
+                       le16_to_cpu(rsp->auto_speeds_supported);
+       dev->phy.fixed_speeds_supported =
+                       le16_to_cpu(rsp->fixed_speeds_supported);
+mbx_err:
+       kfree(cmd);
+       return status;
+}
+
 int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
 {
        int status = -ENOMEM;
@@ -1226,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
 
 static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
 {
-       int i ;
+       int i;
        int status = 0;
        int max_ah;
        struct ocrdma_create_ah_tbl *cmd;
@@ -1357,12 +1538,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
        int i;
 
        mutex_lock(&dev->dev_lock);
-       for (i = 0; i < dev->eq_cnt; i++) {
-               if (dev->eq_tbl[i].q.id != eq_id)
-                       continue;
-               dev->eq_tbl[i].cq_cnt -= 1;
-               break;
-       }
+       i = ocrdma_get_eq_table_index(dev, eq_id);
+       if (i == -EINVAL)
+               BUG();
+       dev->eq_tbl[i].cq_cnt -= 1;
        mutex_unlock(&dev->dev_lock);
 }
 
@@ -1380,7 +1559,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
                       __func__, dev->id, dev->attr.max_cqe, entries);
                return -EINVAL;
        }
-       if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
+       if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R))
                return -EINVAL;
 
        if (dpp_cq) {
@@ -1417,6 +1596,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        cq->eqn = ocrdma_bind_eq(dev);
        cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
        cqe_count = cq->len / cqe_size;
+       cq->cqe_cnt = cqe_count;
        if (cqe_count > 1024) {
                /* Set cnt to 3 to indicate more than 1024 cq entries */
                cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
@@ -1439,7 +1619,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        }
        /* shared eq between all the consumer cqs. */
        cmd->cmd.eqn = cq->eqn;
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                if (dpp_cq)
                        cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
                                OCRDMA_CREATE_CQ_TYPE_SHIFT;
@@ -1484,12 +1664,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
            (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
            OCRDMA_DESTROY_CQ_QID_MASK;
 
-       ocrdma_unbind_eq(dev, cq->eqn);
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
-       if (status)
-               goto mbx_err;
+       ocrdma_unbind_eq(dev, cq->eqn);
        dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
-mbx_err:
        kfree(cmd);
        return status;
 }
@@ -2029,8 +2206,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
                                OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
        qp->rq_cq = cq;
 
-       if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
-           (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
+       if (pd->dpp_enabled && pd->num_dpp_qp) {
                ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
                                             dpp_cq_id);
        }
@@ -2099,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
               sizeof(cmd->params.dgid));
        status = ocrdma_query_gid(&qp->dev->ibdev, 1,
-                        ah_attr->grh.sgid_index, &sgid);
+                       ah_attr->grh.sgid_index, &sgid);
        if (status)
                return status;
 
@@ -2127,8 +2303,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 
 static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                                struct ocrdma_modify_qp *cmd,
-                               struct ib_qp_attr *attrs, int attr_mask,
-                               enum ib_qp_state old_qps)
+                               struct ib_qp_attr *attrs, int attr_mask)
 {
        int status = 0;
 
@@ -2233,8 +2408,7 @@ pmtu_err:
 }
 
 int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
-                        struct ib_qp_attr *attrs, int attr_mask,
-                        enum ib_qp_state old_qps)
+                        struct ib_qp_attr *attrs, int attr_mask)
 {
        int status = -ENOMEM;
        struct ocrdma_modify_qp *cmd;
@@ -2257,7 +2431,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
                    OCRDMA_QP_PARAMS_STATE_MASK;
        }
 
-       status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
+       status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask);
        if (status)
                goto mbx_err;
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
@@ -2488,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev)
 
        for (i = 0; i < num_eq; i++) {
                status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
-                                         OCRDMA_EQ_LEN);
+                                       OCRDMA_EQ_LEN);
                if (status) {
                        status = -EINVAL;
                        break;
@@ -2533,6 +2707,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
        status = ocrdma_mbx_create_ah_tbl(dev);
        if (status)
                goto conf_err;
+       status = ocrdma_mbx_get_phy_info(dev);
+       if (status)
+               goto conf_err;
+       status = ocrdma_mbx_get_ctrl_attribs(dev);
+       if (status)
+               goto conf_err;
+
        return 0;
 
 conf_err:
index 82fe332ae6c605b800da9c4b195db1b6399e60e7..e513f7293142e036b7872424add7b961c5caaf77 100644 (file)
@@ -112,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
                         u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
                         u16 *dpp_credit_lmt);
 int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
-                        struct ib_qp_attr *attrs, int attr_mask,
-                        enum ib_qp_state old_qps);
+                        struct ib_qp_attr *attrs, int attr_mask);
 int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
                        struct ocrdma_qp_params *param);
 int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
@@ -132,5 +131,8 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
 bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
 bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
 void ocrdma_flush_qp(struct ocrdma_qp *);
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
 
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
+char *port_speed_string(struct ocrdma_dev *dev);
 #endif                         /* __OCRDMA_HW_H__ */
index 1a8a945efa60e8fdb6683572d0d39e77f7dba36f..7c504e079744f44425a9bba90d347412ef192bf7 100644 (file)
 #include "ocrdma_ah.h"
 #include "be_roce.h"
 #include "ocrdma_hw.h"
+#include "ocrdma_stats.h"
 #include "ocrdma_abi.h"
 
-MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
-MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
+MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
+MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
 MODULE_AUTHOR("Emulex Corporation");
 MODULE_LICENSE("GPL");
 
@@ -286,7 +287,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
 
        dev->ibdev.process_mad = ocrdma_process_mad;
 
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                dev->ibdev.uverbs_cmd_mask |=
                     OCRDMA_UVERBS(CREATE_SRQ) |
                     OCRDMA_UVERBS(MODIFY_SRQ) |
@@ -338,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
        kfree(dev->sgid_tbl);
 }
 
+/* OCRDMA sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+
+static struct device_attribute *ocrdma_attributes[] = {
+       &dev_attr_hw_rev,
+       &dev_attr_fw_ver
+};
+
+static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+               device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
+}
+
 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 {
-       int status = 0;
+       int status = 0, i;
        struct ocrdma_dev *dev;
 
        dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -369,11 +403,25 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
        if (status)
                goto alloc_err;
 
+       for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+               if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
+                       goto sysfs_err;
        spin_lock(&ocrdma_devlist_lock);
        list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
        spin_unlock(&ocrdma_devlist_lock);
+       /* Init stats */
+       ocrdma_add_port_stats(dev);
+
+       pr_info("%s %s: %s \"%s\" port %d\n",
+               dev_name(&dev->nic_info.pdev->dev), hca_name(dev),
+               port_speed_string(dev), dev->model_number,
+               dev->hba_port_num);
+       pr_info("%s ocrdma%d driver loaded successfully\n",
+               dev_name(&dev->nic_info.pdev->dev), dev->id);
        return dev;
 
+sysfs_err:
+       ocrdma_remove_sysfiles(dev);
 alloc_err:
        ocrdma_free_resources(dev);
        ocrdma_cleanup_hw(dev);
@@ -400,6 +448,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
        /* first unregister with stack to stop all the active traffic
         * of the registered clients.
         */
+       ocrdma_rem_port_stats(dev);
+       ocrdma_remove_sysfiles(dev);
+
        ib_unregister_device(&dev->ibdev);
 
        spin_lock(&ocrdma_devlist_lock);
@@ -437,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
                cur_qp = dev->qp_tbl;
                for (i = 0; i < OCRDMA_MAX_QP; i++) {
                        qp = cur_qp[i];
-                       if (qp) {
+                       if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
                                /* change the QP state to ERROR */
                                _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
 
@@ -478,6 +529,7 @@ static struct ocrdma_driver ocrdma_drv = {
        .add                    = ocrdma_add,
        .remove                 = ocrdma_remove,
        .state_change_handler   = ocrdma_event_handler,
+       .be_abi_version         = OCRDMA_BE_ROCE_ABI_VERSION,
 };
 
 static void ocrdma_unregister_inet6addr_notifier(void)
@@ -487,10 +539,17 @@ static void ocrdma_unregister_inet6addr_notifier(void)
 #endif
 }
 
+static void ocrdma_unregister_inetaddr_notifier(void)
+{
+       unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+}
+
 static int __init ocrdma_init_module(void)
 {
        int status;
 
+       ocrdma_init_debugfs();
+
        status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
        if (status)
                return status;
@@ -498,13 +557,19 @@ static int __init ocrdma_init_module(void)
 #if IS_ENABLED(CONFIG_IPV6)
        status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
        if (status)
-               return status;
+               goto err_notifier6;
 #endif
 
        status = be_roce_register_driver(&ocrdma_drv);
        if (status)
-               ocrdma_unregister_inet6addr_notifier();
+               goto err_be_reg;
 
+       return 0;
+
+err_be_reg:
+       ocrdma_unregister_inet6addr_notifier();
+err_notifier6:
+       ocrdma_unregister_inetaddr_notifier();
        return status;
 }
 
@@ -512,6 +577,8 @@ static void __exit ocrdma_exit_module(void)
 {
        be_roce_unregister_driver(&ocrdma_drv);
        ocrdma_unregister_inet6addr_notifier();
+       ocrdma_unregister_inetaddr_notifier();
+       ocrdma_rem_debugfs();
 }
 
 module_init(ocrdma_init_module);
index 60d5ac23ea803f47c61b2be0ed2bd31590eb97cb..96c9ee602ba49bead98ba33ef2fbd37478abe830 100644 (file)
 
 #define Bit(_b) (1 << (_b))
 
-#define OCRDMA_GEN1_FAMILY     0xB
-#define OCRDMA_GEN2_FAMILY     0x0F
+enum {
+       OCRDMA_ASIC_GEN_SKH_R = 0x04,
+       OCRDMA_ASIC_GEN_LANCER = 0x0B
+};
+
+enum {
+       OCRDMA_ASIC_REV_A0 = 0x00,
+       OCRDMA_ASIC_REV_B0 = 0x10,
+       OCRDMA_ASIC_REV_C0 = 0x20
+};
 
 #define OCRDMA_SUBSYS_ROCE 10
 enum {
@@ -64,6 +72,7 @@ enum {
 
        OCRDMA_CMD_ATTACH_MCAST,
        OCRDMA_CMD_DETACH_MCAST,
+       OCRDMA_CMD_GET_RDMA_STATS,
 
        OCRDMA_CMD_MAX
 };
@@ -74,12 +83,14 @@ enum {
        OCRDMA_CMD_CREATE_CQ            = 12,
        OCRDMA_CMD_CREATE_EQ            = 13,
        OCRDMA_CMD_CREATE_MQ            = 21,
+       OCRDMA_CMD_GET_CTRL_ATTRIBUTES  = 32,
        OCRDMA_CMD_GET_FW_VER           = 35,
        OCRDMA_CMD_DELETE_MQ            = 53,
        OCRDMA_CMD_DELETE_CQ            = 54,
        OCRDMA_CMD_DELETE_EQ            = 55,
        OCRDMA_CMD_GET_FW_CONFIG        = 58,
-       OCRDMA_CMD_CREATE_MQ_EXT        = 90
+       OCRDMA_CMD_CREATE_MQ_EXT        = 90,
+       OCRDMA_CMD_PHY_DETAILS          = 102
 };
 
 enum {
@@ -103,7 +114,10 @@ enum {
        OCRDMA_DB_GEN2_SRQ_OFFSET       = OCRDMA_DB_GEN2_RQ_OFFSET,
        OCRDMA_DB_CQ_OFFSET             = 0x120,
        OCRDMA_DB_EQ_OFFSET             = OCRDMA_DB_CQ_OFFSET,
-       OCRDMA_DB_MQ_OFFSET             = 0x140
+       OCRDMA_DB_MQ_OFFSET             = 0x140,
+
+       OCRDMA_DB_SQ_SHIFT              = 16,
+       OCRDMA_DB_RQ_SHIFT              = 24
 };
 
 #define OCRDMA_DB_CQ_RING_ID_MASK       0x3FF  /* bits 0 - 9 */
@@ -138,6 +152,10 @@ enum {
 #define OCRDMA_MIN_Q_PAGE_SIZE (4096)
 #define OCRDMA_MAX_Q_PAGES     (8)
 
+#define OCRDMA_SLI_ASIC_ID_OFFSET      0x9C
+#define OCRDMA_SLI_ASIC_REV_MASK       0x000000FF
+#define OCRDMA_SLI_ASIC_GEN_NUM_MASK   0x0000FF00
+#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT  0x08
 /*
 # 0: 4K Bytes
 # 1: 8K Bytes
@@ -562,6 +580,30 @@ enum {
        OCRDMA_FN_MODE_RDMA     = 0x4
 };
 
+struct ocrdma_get_phy_info_rsp {
+       struct ocrdma_mqe_hdr hdr;
+       struct ocrdma_mbx_rsp rsp;
+
+       u16 phy_type;
+       u16 interface_type;
+       u32 misc_params;
+       u16 ext_phy_details;
+       u16 rsvd;
+       u16 auto_speeds_supported;
+       u16 fixed_speeds_supported;
+       u32 future_use[2];
+};
+
+enum {
+       OCRDMA_PHY_SPEED_ZERO = 0x0,
+       OCRDMA_PHY_SPEED_10MBPS = 0x1,
+       OCRDMA_PHY_SPEED_100MBPS = 0x2,
+       OCRDMA_PHY_SPEED_1GBPS = 0x4,
+       OCRDMA_PHY_SPEED_10GBPS = 0x8,
+       OCRDMA_PHY_SPEED_40GBPS = 0x20
+};
+
+
 struct ocrdma_get_link_speed_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
@@ -590,7 +632,7 @@ enum {
 
 enum {
        OCRDMA_CREATE_CQ_VER2                   = 2,
-       OCRDMA_CREATE_CQ_VER3                   = 3,
+       OCRDMA_CREATE_CQ_VER3                   = 3,
 
        OCRDMA_CREATE_CQ_PAGE_CNT_MASK          = 0xFFFF,
        OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT        = 16,
@@ -1050,6 +1092,7 @@ enum {
        OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK       = 0xFFFF <<
                                        OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
 };
+
 struct ocrdma_modify_qp_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
@@ -1062,8 +1105,8 @@ struct ocrdma_query_qp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr req;
 
-#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
-#define OCRDMA_QUERY_UP_QP_ID_MASK   0xFFFFFF
+#define OCRDMA_QUERY_UP_QP_ID_SHIFT    0
+#define OCRDMA_QUERY_UP_QP_ID_MASK     0xFFFFFF
        u32 qp_id;
 };
 
@@ -1703,4 +1746,208 @@ struct ocrdma_av {
        u32 valid;
 } __packed;
 
+struct ocrdma_rsrc_stats {
+       u32 dpp_pds;
+       u32 non_dpp_pds;
+       u32 rc_dpp_qps;
+       u32 uc_dpp_qps;
+       u32 ud_dpp_qps;
+       u32 rc_non_dpp_qps;
+       u32 rsvd;
+       u32 uc_non_dpp_qps;
+       u32 ud_non_dpp_qps;
+       u32 rsvd1;
+       u32 srqs;
+       u32 rbqs;
+       u32 r64K_nsmr;
+       u32 r64K_to_2M_nsmr;
+       u32 r2M_to_44M_nsmr;
+       u32 r44M_to_1G_nsmr;
+       u32 r1G_to_4G_nsmr;
+       u32 nsmr_count_4G_to_32G;
+       u32 r32G_to_64G_nsmr;
+       u32 r64G_to_128G_nsmr;
+       u32 r128G_to_higher_nsmr;
+       u32 embedded_nsmr;
+       u32 frmr;
+       u32 prefetch_qps;
+       u32 ondemand_qps;
+       u32 phy_mr;
+       u32 mw;
+       u32 rsvd2[7];
+};
+
+struct ocrdma_db_err_stats {
+       u32 sq_doorbell_errors;
+       u32 cq_doorbell_errors;
+       u32 rq_srq_doorbell_errors;
+       u32 cq_overflow_errors;
+       u32 rsvd[4];
+};
+
+struct ocrdma_wqe_stats {
+       u32 large_send_rc_wqes_lo;
+       u32 large_send_rc_wqes_hi;
+       u32 large_write_rc_wqes_lo;
+       u32 large_write_rc_wqes_hi;
+       u32 rsvd[4];
+       u32 read_wqes_lo;
+       u32 read_wqes_hi;
+       u32 frmr_wqes_lo;
+       u32 frmr_wqes_hi;
+       u32 mw_bind_wqes_lo;
+       u32 mw_bind_wqes_hi;
+       u32 invalidate_wqes_lo;
+       u32 invalidate_wqes_hi;
+       u32 rsvd1[2];
+       u32 dpp_wqe_drops;
+       u32 rsvd2[5];
+};
+
+struct ocrdma_tx_stats {
+       u32 send_pkts_lo;
+       u32 send_pkts_hi;
+       u32 write_pkts_lo;
+       u32 write_pkts_hi;
+       u32 read_pkts_lo;
+       u32 read_pkts_hi;
+       u32 read_rsp_pkts_lo;
+       u32 read_rsp_pkts_hi;
+       u32 ack_pkts_lo;
+       u32 ack_pkts_hi;
+       u32 send_bytes_lo;
+       u32 send_bytes_hi;
+       u32 write_bytes_lo;
+       u32 write_bytes_hi;
+       u32 read_req_bytes_lo;
+       u32 read_req_bytes_hi;
+       u32 read_rsp_bytes_lo;
+       u32 read_rsp_bytes_hi;
+       u32 ack_timeouts;
+       u32 rsvd[5];
+};
+
+
+struct ocrdma_tx_qp_err_stats {
+       u32 local_length_errors;
+       u32 local_protection_errors;
+       u32 local_qp_operation_errors;
+       u32 retry_count_exceeded_errors;
+       u32 rnr_retry_count_exceeded_errors;
+       u32 rsvd[3];
+};
+
+struct ocrdma_rx_stats {
+       u32 roce_frame_bytes_lo;
+       u32 roce_frame_bytes_hi;
+       u32 roce_frame_icrc_drops;
+       u32 roce_frame_payload_len_drops;
+       u32 ud_drops;
+       u32 qp1_drops;
+       u32 psn_error_request_packets;
+       u32 psn_error_resp_packets;
+       u32 rnr_nak_timeouts;
+       u32 rnr_nak_receives;
+       u32 roce_frame_rxmt_drops;
+       u32 nak_count_psn_sequence_errors;
+       u32 rc_drop_count_lookup_errors;
+       u32 rq_rnr_naks;
+       u32 srq_rnr_naks;
+       u32 roce_frames_lo;
+       u32 roce_frames_hi;
+       u32 rsvd;
+};
+
+struct ocrdma_rx_qp_err_stats {
+       u32 nak_invalid_requst_errors;
+       u32 nak_remote_operation_errors;
+       u32 nak_count_remote_access_errors;
+       u32 local_length_errors;
+       u32 local_protection_errors;
+       u32 local_qp_operation_errors;
+       u32 rsvd[2];
+};
+
+struct ocrdma_tx_dbg_stats {
+       u32 data[100];
+};
+
+struct ocrdma_rx_dbg_stats {
+       u32 data[200];
+};
+
+struct ocrdma_rdma_stats_req {
+       struct ocrdma_mbx_hdr hdr;
+       u8 reset_stats;
+       u8 rsvd[3];
+} __packed;
+
+struct ocrdma_rdma_stats_resp {
+       struct ocrdma_mbx_hdr hdr;
+       struct ocrdma_rsrc_stats act_rsrc_stats;
+       struct ocrdma_rsrc_stats th_rsrc_stats;
+       struct ocrdma_db_err_stats      db_err_stats;
+       struct ocrdma_wqe_stats         wqe_stats;
+       struct ocrdma_tx_stats          tx_stats;
+       struct ocrdma_tx_qp_err_stats   tx_qp_err_stats;
+       struct ocrdma_rx_stats          rx_stats;
+       struct ocrdma_rx_qp_err_stats   rx_qp_err_stats;
+       struct ocrdma_tx_dbg_stats      tx_dbg_stats;
+       struct ocrdma_rx_dbg_stats      rx_dbg_stats;
+} __packed;
+
+
+struct mgmt_hba_attribs {
+       u8 flashrom_version_string[32];
+       u8 manufacturer_name[32];
+       u32 supported_modes;
+       u32 rsvd0[3];
+       u8 ncsi_ver_string[12];
+       u32 default_extended_timeout;
+       u8 controller_model_number[32];
+       u8 controller_description[64];
+       u8 controller_serial_number[32];
+       u8 ip_version_string[32];
+       u8 firmware_version_string[32];
+       u8 bios_version_string[32];
+       u8 redboot_version_string[32];
+       u8 driver_version_string[32];
+       u8 fw_on_flash_version_string[32];
+       u32 functionalities_supported;
+       u16 max_cdblength;
+       u8 asic_revision;
+       u8 generational_guid[16];
+       u8 hba_port_count;
+       u16 default_link_down_timeout;
+       u8 iscsi_ver_min_max;
+       u8 multifunction_device;
+       u8 cache_valid;
+       u8 hba_status;
+       u8 max_domains_supported;
+       u8 phy_port;
+       u32 firmware_post_status;
+       u32 hba_mtu[8];
+       u32 rsvd1[4];
+};
+
+struct mgmt_controller_attrib {
+       struct mgmt_hba_attribs hba_attribs;
+       u16 pci_vendor_id;
+       u16 pci_device_id;
+       u16 pci_sub_vendor_id;
+       u16 pci_sub_system_id;
+       u8 pci_bus_number;
+       u8 pci_device_number;
+       u8 pci_function_number;
+       u8 interface_type;
+       u64 unique_identifier;
+       u32 rsvd0[5];
+};
+
+struct ocrdma_get_ctrl_attribs_rsp {
+       struct ocrdma_mbx_hdr hdr;
+       struct mgmt_controller_attrib ctrl_attribs;
+};
+
+
 #endif                         /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
new file mode 100644 (file)
index 0000000..6c54106
--- /dev/null
@@ -0,0 +1,623 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <rdma/ib_addr.h>
+#include "ocrdma_stats.h"
+
+static struct dentry *ocrdma_dbgfs_dir;
+
+static int ocrdma_add_stat(char *start, char *pcur,
+                               char *name, u64 count)
+{
+       char buff[128] = {0};
+       int cpy_len = 0;
+
+       snprintf(buff, 128, "%s: %llu\n", name, count);
+       cpy_len = strlen(buff);
+
+       if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) {
+               pr_err("%s: No space in stats buff\n", __func__);
+               return 0;
+       }
+
+       memcpy(pcur, buff, cpy_len);
+       return cpy_len;
+}
+
+static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+{
+       struct stats_mem *mem = &dev->stats_mem;
+
+       /* Alloc mbox command mem*/
+       mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
+                       sizeof(struct ocrdma_rdma_stats_resp));
+
+       mem->va   = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+                                        &mem->pa, GFP_KERNEL);
+       if (!mem->va) {
+               pr_err("%s: stats mbox allocation failed\n", __func__);
+               return false;
+       }
+
+       memset(mem->va, 0, mem->size);
+
+       /* Alloc debugfs mem */
+       mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
+       if (!mem->debugfs_mem) {
+               pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+               return false;
+       }
+
+       return true;
+}
+
+static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+{
+       struct stats_mem *mem = &dev->stats_mem;
+
+       if (mem->va)
+               dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
+                                 mem->va, mem->pa);
+       kfree(mem->debugfs_mem);
+}
+
+static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+                       (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds",
+                               (u64)rsrc_stats->dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds",
+                               (u64)rsrc_stats->non_dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps",
+                               (u64)rsrc_stats->rc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps",
+                               (u64)rsrc_stats->uc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps",
+                               (u64)rsrc_stats->ud_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps",
+                               (u64)rsrc_stats->rc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps",
+                               (u64)rsrc_stats->uc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps",
+                               (u64)rsrc_stats->ud_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_srqs",
+                               (u64)rsrc_stats->srqs);
+       pcur += ocrdma_add_stat(stats, pcur, "active_rbqs",
+                               (u64)rsrc_stats->rbqs);
+       pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr",
+                               (u64)rsrc_stats->r64K_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr",
+                               (u64)rsrc_stats->r64K_to_2M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr",
+                               (u64)rsrc_stats->r2M_to_44M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr",
+                               (u64)rsrc_stats->r44M_to_1G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr",
+                               (u64)rsrc_stats->r1G_to_4G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G",
+                               (u64)rsrc_stats->nsmr_count_4G_to_32G);
+       pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr",
+                               (u64)rsrc_stats->r32G_to_64G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr",
+                               (u64)rsrc_stats->r64G_to_128G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr",
+                               (u64)rsrc_stats->r128G_to_higher_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr",
+                               (u64)rsrc_stats->embedded_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_frmr",
+                               (u64)rsrc_stats->frmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps",
+                               (u64)rsrc_stats->prefetch_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps",
+                               (u64)rsrc_stats->ondemand_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr",
+                               (u64)rsrc_stats->phy_mr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_mw",
+                               (u64)rsrc_stats->mw);
+
+       /* Print the threshold stats */
+       rsrc_stats = &rdma_stats->th_rsrc_stats;
+
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds",
+                               (u64)rsrc_stats->dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds",
+                               (u64)rsrc_stats->non_dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps",
+                               (u64)rsrc_stats->rc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps",
+                               (u64)rsrc_stats->uc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps",
+                               (u64)rsrc_stats->ud_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps",
+                               (u64)rsrc_stats->rc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps",
+                               (u64)rsrc_stats->uc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps",
+                               (u64)rsrc_stats->ud_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs",
+                               (u64)rsrc_stats->srqs);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs",
+                               (u64)rsrc_stats->rbqs);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr",
+                               (u64)rsrc_stats->r64K_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr",
+                               (u64)rsrc_stats->r64K_to_2M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr",
+                               (u64)rsrc_stats->r2M_to_44M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr",
+                               (u64)rsrc_stats->r44M_to_1G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr",
+                               (u64)rsrc_stats->r1G_to_4G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G",
+                               (u64)rsrc_stats->nsmr_count_4G_to_32G);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr",
+                               (u64)rsrc_stats->r32G_to_64G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr",
+                               (u64)rsrc_stats->r64G_to_128G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr",
+                               (u64)rsrc_stats->r128G_to_higher_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr",
+                               (u64)rsrc_stats->embedded_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr",
+                               (u64)rsrc_stats->frmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps",
+                               (u64)rsrc_stats->prefetch_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps",
+                               (u64)rsrc_stats->ondemand_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr",
+                               (u64)rsrc_stats->phy_mr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_mw",
+                               (u64)rsrc_stats->mw);
+       return stats;
+}
+
+static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat
+               (stats, pcur, "roce_frame_bytes",
+                convert_to_64bit(rx_stats->roce_frame_bytes_lo,
+                rx_stats->roce_frame_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops",
+                               (u64)rx_stats->roce_frame_icrc_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops",
+                               (u64)rx_stats->roce_frame_payload_len_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "ud_drops",
+                               (u64)rx_stats->ud_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "qp1_drops",
+                               (u64)rx_stats->qp1_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets",
+                               (u64)rx_stats->psn_error_request_packets);
+       pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets",
+                               (u64)rx_stats->psn_error_resp_packets);
+       pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts",
+                               (u64)rx_stats->rnr_nak_timeouts);
+       pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives",
+                               (u64)rx_stats->rnr_nak_receives);
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops",
+                               (u64)rx_stats->roce_frame_rxmt_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors",
+                               (u64)rx_stats->nak_count_psn_sequence_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors",
+                               (u64)rx_stats->rc_drop_count_lookup_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks",
+                               (u64)rx_stats->rq_rnr_naks);
+       pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks",
+                               (u64)rx_stats->srq_rnr_naks);
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frames",
+                               convert_to_64bit(rx_stats->roce_frames_lo,
+                                                rx_stats->roce_frames_hi));
+
+       return stats;
+}
+
+static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "send_pkts",
+                               convert_to_64bit(tx_stats->send_pkts_lo,
+                                                tx_stats->send_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "write_pkts",
+                               convert_to_64bit(tx_stats->write_pkts_lo,
+                                                tx_stats->write_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_pkts",
+                               convert_to_64bit(tx_stats->read_pkts_lo,
+                                                tx_stats->read_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts",
+                               convert_to_64bit(tx_stats->read_rsp_pkts_lo,
+                                                tx_stats->read_rsp_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "ack_pkts",
+                               convert_to_64bit(tx_stats->ack_pkts_lo,
+                                                tx_stats->ack_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "send_bytes",
+                               convert_to_64bit(tx_stats->send_bytes_lo,
+                                                tx_stats->send_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "write_bytes",
+                               convert_to_64bit(tx_stats->write_bytes_lo,
+                                                tx_stats->write_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes",
+                               convert_to_64bit(tx_stats->read_req_bytes_lo,
+                                                tx_stats->read_req_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes",
+                               convert_to_64bit(tx_stats->read_rsp_bytes_lo,
+                                                tx_stats->read_rsp_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts",
+                               (u64)tx_stats->ack_timeouts);
+
+       return stats;
+}
+
+static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes",
+               convert_to_64bit(wqe_stats->large_send_rc_wqes_lo,
+                                wqe_stats->large_send_rc_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes",
+               convert_to_64bit(wqe_stats->large_write_rc_wqes_lo,
+                                wqe_stats->large_write_rc_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_wqes",
+                               convert_to_64bit(wqe_stats->read_wqes_lo,
+                                                wqe_stats->read_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes",
+                               convert_to_64bit(wqe_stats->frmr_wqes_lo,
+                                                wqe_stats->frmr_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes",
+                               convert_to_64bit(wqe_stats->mw_bind_wqes_lo,
+                                                wqe_stats->mw_bind_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes",
+               convert_to_64bit(wqe_stats->invalidate_wqes_lo,
+                                wqe_stats->invalidate_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops",
+                               (u64)wqe_stats->dpp_wqe_drops);
+       return stats;
+}
+
+static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors",
+                               (u64)db_err_stats->sq_doorbell_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors",
+                               (u64)db_err_stats->cq_doorbell_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors",
+                               (u64)db_err_stats->rq_srq_doorbell_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors",
+                               (u64)db_err_stats->cq_overflow_errors);
+       return stats;
+}
+
+static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rx_qp_err_stats *rx_qp_err_stats =
+                &rdma_stats->rx_qp_err_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
+                       (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
+                       (u64)rx_qp_err_stats->nak_remote_operation_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
+                       (u64)rx_qp_err_stats->nak_count_remote_access_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+                       (u64)rx_qp_err_stats->local_length_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+                       (u64)rx_qp_err_stats->local_protection_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+                       (u64)rx_qp_err_stats->local_qp_operation_errors);
+       return stats;
+}
+
+static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_tx_qp_err_stats *tx_qp_err_stats =
+               &rdma_stats->tx_qp_err_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+                       (u64)tx_qp_err_stats->local_length_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+                       (u64)tx_qp_err_stats->local_protection_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+                       (u64)tx_qp_err_stats->local_qp_operation_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors",
+                       (u64)tx_qp_err_stats->retry_count_exceeded_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors",
+                       (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors);
+       return stats;
+}
+
+static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+{
+       int i;
+       char *pstats = dev->stats_mem.debugfs_mem;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_tx_dbg_stats *tx_dbg_stats =
+               &rdma_stats->tx_dbg_stats;
+
+       memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       for (i = 0; i < 100; i++)
+               pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+                                tx_dbg_stats->data[i]);
+
+       return dev->stats_mem.debugfs_mem;
+}
+
+static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+{
+       int i;
+       char *pstats = dev->stats_mem.debugfs_mem;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rx_dbg_stats *rx_dbg_stats =
+               &rdma_stats->rx_dbg_stats;
+
+       memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       for (i = 0; i < 200; i++)
+               pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+                                rx_dbg_stats->data[i]);
+
+       return dev->stats_mem.debugfs_mem;
+}
+
+static void ocrdma_update_stats(struct ocrdma_dev *dev)
+{
+       ulong now = jiffies, secs;
+       int status = 0;
+
+       secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U;
+       if (secs) {
+               /* update */
+               status = ocrdma_mbx_rdma_stats(dev, false);
+               if (status)
+                       pr_err("%s: stats mbox failed with status = %d\n",
+                              __func__, status);
+               dev->last_stats_time = jiffies;
+       }
+}
+
+static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
+                                       size_t usr_buf_len, loff_t *ppos)
+{
+       struct ocrdma_stats *pstats = filp->private_data;
+       struct ocrdma_dev *dev = pstats->dev;
+       ssize_t status = 0;
+       char *data = NULL;
+
+       /* No partial reads */
+       if (*ppos != 0)
+               return 0;
+
+       mutex_lock(&dev->stats_lock);
+
+       ocrdma_update_stats(dev);
+
+       switch (pstats->type) {
+       case OCRDMA_RSRC_STATS:
+               data = ocrdma_resource_stats(dev);
+               break;
+       case OCRDMA_RXSTATS:
+               data = ocrdma_rx_stats(dev);
+               break;
+       case OCRDMA_WQESTATS:
+               data = ocrdma_wqe_stats(dev);
+               break;
+       case OCRDMA_TXSTATS:
+               data = ocrdma_tx_stats(dev);
+               break;
+       case OCRDMA_DB_ERRSTATS:
+               data = ocrdma_db_errstats(dev);
+               break;
+       case OCRDMA_RXQP_ERRSTATS:
+               data = ocrdma_rxqp_errstats(dev);
+               break;
+       case OCRDMA_TXQP_ERRSTATS:
+               data = ocrdma_txqp_errstats(dev);
+               break;
+       case OCRDMA_TX_DBG_STATS:
+               data = ocrdma_tx_dbg_stats(dev);
+               break;
+       case OCRDMA_RX_DBG_STATS:
+               data = ocrdma_rx_dbg_stats(dev);
+               break;
+
+       default:
+               status = -EFAULT;
+               goto exit;
+       }
+
+       if (usr_buf_len < strlen(data)) {
+               status = -ENOSPC;
+               goto exit;
+       }
+
+       status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data,
+                                        strlen(data));
+exit:
+       mutex_unlock(&dev->stats_lock);
+       return status;
+}
+
+static int ocrdma_debugfs_open(struct inode *inode, struct file *file)
+{
+       if (inode->i_private)
+               file->private_data = inode->i_private;
+       return 0;
+}
+
+static const struct file_operations ocrdma_dbg_ops = {
+       .owner = THIS_MODULE,
+       .open = ocrdma_debugfs_open,
+       .read = ocrdma_dbgfs_ops_read,
+};
+
+void ocrdma_add_port_stats(struct ocrdma_dev *dev)
+{
+       if (!ocrdma_dbgfs_dir)
+               return;
+
+       /* Create post stats base dir */
+       dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+       if (!dev->dir)
+               goto err;
+
+       dev->rsrc_stats.type = OCRDMA_RSRC_STATS;
+       dev->rsrc_stats.dev = dev;
+       if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
+                                &dev->rsrc_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->rx_stats.type = OCRDMA_RXSTATS;
+       dev->rx_stats.dev = dev;
+       if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir,
+                                &dev->rx_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->wqe_stats.type = OCRDMA_WQESTATS;
+       dev->wqe_stats.dev = dev;
+       if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir,
+                                &dev->wqe_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->tx_stats.type = OCRDMA_TXSTATS;
+       dev->tx_stats.dev = dev;
+       if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir,
+                                &dev->tx_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->db_err_stats.type = OCRDMA_DB_ERRSTATS;
+       dev->db_err_stats.dev = dev;
+       if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
+                                &dev->db_err_stats, &ocrdma_dbg_ops))
+               goto err;
+
+
+       dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS;
+       dev->tx_qp_err_stats.dev = dev;
+       if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
+                                &dev->tx_qp_err_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS;
+       dev->rx_qp_err_stats.dev = dev;
+       if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
+                                &dev->rx_qp_err_stats, &ocrdma_dbg_ops))
+               goto err;
+
+
+       dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS;
+       dev->tx_dbg_stats.dev = dev;
+       if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
+                                &dev->tx_dbg_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS;
+       dev->rx_dbg_stats.dev = dev;
+       if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
+                                &dev->rx_dbg_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       /* Now create dma_mem for stats mbx command */
+       if (!ocrdma_alloc_stats_mem(dev))
+               goto err;
+
+       mutex_init(&dev->stats_lock);
+
+       return;
+err:
+       ocrdma_release_stats_mem(dev);
+       debugfs_remove_recursive(dev->dir);
+       dev->dir = NULL;
+}
+
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
+{
+       if (!dev->dir)
+               return;
+       mutex_destroy(&dev->stats_lock);
+       ocrdma_release_stats_mem(dev);
+       debugfs_remove(dev->dir);
+}
+
+void ocrdma_init_debugfs(void)
+{
+       /* Create base dir in debugfs root dir */
+       ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL);
+}
+
+void ocrdma_rem_debugfs(void)
+{
+       debugfs_remove_recursive(ocrdma_dbgfs_dir);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
new file mode 100644 (file)
index 0000000..5f5e20c
--- /dev/null
@@ -0,0 +1,54 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_STATS_H__
+#define __OCRDMA_STATS_H__
+
+#include <linux/debugfs.h>
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+
+#define OCRDMA_MAX_DBGFS_MEM 4096
+
+enum OCRDMA_STATS_TYPE {
+       OCRDMA_RSRC_STATS,
+       OCRDMA_RXSTATS,
+       OCRDMA_WQESTATS,
+       OCRDMA_TXSTATS,
+       OCRDMA_DB_ERRSTATS,
+       OCRDMA_RXQP_ERRSTATS,
+       OCRDMA_TXQP_ERRSTATS,
+       OCRDMA_TX_DBG_STATS,
+       OCRDMA_RX_DBG_STATS
+};
+
+void ocrdma_rem_debugfs(void);
+void ocrdma_init_debugfs(void);
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
+void ocrdma_add_port_stats(struct ocrdma_dev *dev);
+
+#endif /* __OCRDMA_STATS_H__ */
index 0de3473fa7d9747164a0ee2355c2efe87481c8c0..edf6211d84b8ec5f7b9d22da5f7f6170f0c810ef 100644 (file)
@@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
 
        dev = get_ocrdma_dev(ibdev);
        memset(sgid, 0, sizeof(*sgid));
-       if (index >= OCRDMA_MAX_SGID)
+       if (index > OCRDMA_MAX_SGID)
                return -EINVAL;
 
        memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
@@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
        attr->max_cq = dev->attr.max_cq;
        attr->max_cqe = dev->attr.max_cqe;
        attr->max_mr = dev->attr.max_mr;
-       attr->max_mw = 0;
+       attr->max_mw = dev->attr.max_mw;
        attr->max_pd = dev->attr.max_pd;
        attr->atomic_cap = 0;
        attr->max_fmr = 0;
@@ -144,7 +144,6 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
        }
 }
 
-
 int ocrdma_query_port(struct ib_device *ibdev,
                      u8 port, struct ib_port_attr *props)
 {
@@ -267,7 +266,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
 
        if (udata && uctx) {
                pd->dpp_enabled =
-                       dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
+                       ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
                pd->num_dpp_qp =
                        pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
        }
@@ -838,8 +837,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 
        status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
 
-       if (mr->hwmr.fr_mr == 0)
-               ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 
        /* it could be user registered memory. */
        if (mr->umem)
@@ -908,6 +906,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
        spin_lock_init(&cq->comp_handler_lock);
        INIT_LIST_HEAD(&cq->sq_head);
        INIT_LIST_HEAD(&cq->rq_head);
+       cq->first_arm = true;
 
        if (ib_ctx) {
                uctx = get_ocrdma_ucontext(ib_ctx);
@@ -925,9 +924,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
                        goto ctx_err;
        }
        cq->phase = OCRDMA_CQE_VALID;
-       cq->arm_needed = true;
        dev->cq_tbl[cq->id] = cq;
-
        return &cq->ibcq;
 
 ctx_err:
@@ -950,15 +947,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
        return status;
 }
 
+static void ocrdma_flush_cq(struct ocrdma_cq *cq)
+{
+       int cqe_cnt;
+       int valid_count = 0;
+       unsigned long flags;
+
+       struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
+       struct ocrdma_cqe *cqe = NULL;
+
+       cqe = cq->va;
+       cqe_cnt = cq->cqe_cnt;
+
+       /* Last irq might have scheduled a polling thread
+        * sync-up with it before hard flushing.
+        */
+       spin_lock_irqsave(&cq->cq_lock, flags);
+       while (cqe_cnt) {
+               if (is_cqe_valid(cq, cqe))
+                       valid_count++;
+               cqe++;
+               cqe_cnt--;
+       }
+       ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
+       spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
 int ocrdma_destroy_cq(struct ib_cq *ibcq)
 {
        int status;
        struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+       struct ocrdma_eq *eq = NULL;
        struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
        int pdid = 0;
+       u32 irq, indx;
 
-       status = ocrdma_mbx_destroy_cq(dev, cq);
+       dev->cq_tbl[cq->id] = NULL;
+       indx = ocrdma_get_eq_table_index(dev, cq->eqn);
+       if (indx == -EINVAL)
+               BUG();
 
+       eq = &dev->eq_tbl[indx];
+       irq = ocrdma_get_irq(dev, eq);
+       synchronize_irq(irq);
+       ocrdma_flush_cq(cq);
+
+       status = ocrdma_mbx_destroy_cq(dev, cq);
        if (cq->ucontext) {
                pdid = cq->ucontext->cntxt_pd->id;
                ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
@@ -967,7 +1001,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
                                ocrdma_get_db_addr(dev, pdid),
                                dev->nic_info.db_page_size);
        }
-       dev->cq_tbl[cq->id] = NULL;
 
        kfree(cq);
        return status;
@@ -1090,15 +1123,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
        }
        uresp.db_page_addr = usr_db;
        uresp.db_page_size = dev->nic_info.db_page_size;
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
-               uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
-               uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
-               uresp.db_shift = 24;
-       } else {
-               uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
-               uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
-               uresp.db_shift = 16;
-       }
+       uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
+       uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+       uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
 
        if (qp->dpp_enabled) {
                uresp.dpp_credit = dpp_credit_lmt;
@@ -1130,7 +1157,7 @@ err:
 static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
                             struct ocrdma_pd *pd)
 {
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                qp->sq_db = dev->nic_info.db +
                        (pd->id * dev->nic_info.db_page_size) +
                        OCRDMA_DB_GEN2_SQ_OFFSET;
@@ -1180,7 +1207,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
        qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
 }
 
-
 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
                                   struct ib_qp_init_attr *attrs)
 {
@@ -1266,17 +1292,6 @@ gen_err:
        return ERR_PTR(status);
 }
 
-
-static void ocrdma_flush_rq_db(struct ocrdma_qp *qp)
-{
-       if (qp->db_cache) {
-               u32 val = qp->rq.dbid | (qp->db_cache <<
-                               ocrdma_get_num_posted_shift(qp));
-               iowrite32(val, qp->rq_db);
-               qp->db_cache = 0;
-       }
-}
-
 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                      int attr_mask)
 {
@@ -1294,9 +1309,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
         */
        if (status < 0)
                return status;
-       status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
-       if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR)
-               ocrdma_flush_rq_db(qp);
+       status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
 
        return status;
 }
@@ -1508,7 +1521,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
        int discard_cnt = 0;
        u32 cur_getp, stop_getp;
        struct ocrdma_cqe *cqe;
-       u32 qpn = 0;
+       u32 qpn = 0, wqe_idx = 0;
 
        spin_lock_irqsave(&cq->cq_lock, cq_flags);
 
@@ -1537,24 +1550,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
                if (qpn == 0 || qpn != qp->id)
                        goto skip_cqe;
 
-               /* mark cqe discarded so that it is not picked up later
-                * in the poll_cq().
-                */
-               discard_cnt += 1;
-               cqe->cmn.qpn = 0;
                if (is_cqe_for_sq(cqe)) {
                        ocrdma_hwq_inc_tail(&qp->sq);
                } else {
                        if (qp->srq) {
+                               wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+                                       OCRDMA_CQE_BUFTAG_SHIFT) &
+                                       qp->srq->rq.max_wqe_idx;
+                               if (wqe_idx < 1)
+                                       BUG();
                                spin_lock_irqsave(&qp->srq->q_lock, flags);
                                ocrdma_hwq_inc_tail(&qp->srq->rq);
-                               ocrdma_srq_toggle_bit(qp->srq, cur_getp);
+                               ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
                                spin_unlock_irqrestore(&qp->srq->q_lock, flags);
 
                        } else {
                                ocrdma_hwq_inc_tail(&qp->rq);
                        }
                }
+               /* mark cqe discarded so that it is not picked up later
+                * in the poll_cq().
+                */
+               discard_cnt += 1;
+               cqe->cmn.qpn = 0;
 skip_cqe:
                cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
        } while (cur_getp != stop_getp);
@@ -1657,7 +1675,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
            (srq->pd->id * dev->nic_info.db_page_size);
        uresp.db_page_size = dev->nic_info.db_page_size;
        uresp.num_rqe_allocated = srq->rq.max_cnt;
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
                uresp.db_shift = 24;
        } else {
@@ -2007,15 +2025,15 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
        fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
        fast_reg->size_sge =
                get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
-       mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) &
-               (OCRDMA_MAX_STAG - 1)];
+       mr = (struct ocrdma_mr *) (unsigned long)
+               qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
        build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
        return 0;
 }
 
 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
 {
-       u32 val = qp->sq.dbid | (1 << 16);
+       u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
 
        iowrite32(val, qp->sq_db);
 }
@@ -2120,12 +2138,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
 {
-       u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
+       u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
 
-       if (qp->state != OCRDMA_QPS_INIT)
-               iowrite32(val, qp->rq_db);
-       else
-               qp->db_cache++;
+       iowrite32(val, qp->rq_db);
 }
 
 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
@@ -2211,7 +2226,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
 
        if (row == srq->bit_fields_len)
                BUG();
-       return indx;
+       return indx + 1; /* Use from index 1 */
 }
 
 static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
@@ -2548,10 +2563,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
 
        srq = get_ocrdma_srq(qp->ibqp.srq);
        wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
-                       OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+               OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+       if (wqe_idx < 1)
+               BUG();
+
        ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
        spin_lock_irqsave(&srq->q_lock, flags);
-       ocrdma_srq_toggle_bit(srq, wqe_idx);
+       ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
        spin_unlock_irqrestore(&srq->q_lock, flags);
        ocrdma_hwq_inc_tail(&srq->rq);
 }
@@ -2703,10 +2721,18 @@ expand_cqe:
        }
 stop_cqe:
        cq->getp = cur_getp;
-       if (polled_hw_cqes || expand || stop) {
-               ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
+       if (cq->deferred_arm) {
+               ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
                                  polled_hw_cqes);
+               cq->deferred_arm = false;
+               cq->deferred_sol = false;
+       } else {
+               /* We need to pop the CQE. No need to arm */
+               ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
+                                 polled_hw_cqes);
+               cq->deferred_sol = false;
        }
+
        return i;
 }
 
@@ -2778,30 +2804,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
        struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
        struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
        u16 cq_id;
-       u16 cur_getp;
-       struct ocrdma_cqe *cqe;
        unsigned long flags;
+       bool arm_needed = false, sol_needed = false;
 
        cq_id = cq->id;
 
        spin_lock_irqsave(&cq->cq_lock, flags);
        if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
-               cq->armed = true;
+               arm_needed = true;
        if (cq_flags & IB_CQ_SOLICITED)
-               cq->solicited = true;
-
-       cur_getp = cq->getp;
-       cqe = cq->va + cur_getp;
+               sol_needed = true;
 
-       /* check whether any valid cqe exist or not, if not then safe to
-        * arm. If cqe is not yet consumed, then let it get consumed and then
-        * we arm it to avoid false interrupts.
-        */
-       if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
-               cq->arm_needed = false;
-               ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
+       if (cq->first_arm) {
+               ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
+               cq->first_arm = false;
+               goto skip_defer;
        }
+       cq->deferred_arm = true;
+
+skip_defer:
+       cq->deferred_sol = sol_needed;
        spin_unlock_irqrestore(&cq->cq_lock, flags);
+
        return 0;
 }
 
@@ -2836,7 +2860,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
                goto mbx_err;
        mr->ibmr.rkey = mr->hwmr.lkey;
        mr->ibmr.lkey = mr->hwmr.lkey;
-       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
+       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
+               (unsigned long) mr;
        return &mr->ibmr;
 mbx_err:
        ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
index 1946101419a31c0c3066c72d90401639a474d3e3..c00ae093b6f881870867b8dac16af33efca4091b 100644 (file)
@@ -868,8 +868,10 @@ struct qib_devdata {
        /* last buffer for user use */
        u32 lastctxt_piobuf;
 
-       /* saturating counter of (non-port-specific) device interrupts */
-       u32 int_counter;
+       /* reset value */
+       u64 z_int_counter;
+       /* percpu intcounter */
+       u64 __percpu *int_counter;
 
        /* pio bufs allocated per ctxt */
        u32 pbufsctxt;
@@ -1184,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *);
 void qib_set_ctxtcnt(struct qib_devdata *);
 int qib_create_ctxts(struct qib_devdata *dd);
 struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
-void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
 void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
 
 u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
@@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *);
 void qib_nomsix(struct qib_devdata *);
 void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
 void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
 
 /*
  * dma_addr wrappers - all 0's invalid for hw
index 1686fd4bda87286c6120d96c432f9bba198e4596..5dfda4c5cc9c3b1fde36d02c55cd387b7e366542 100644 (file)
@@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
                                 size_t count, loff_t *off)
 {
        u32 __iomem *piobuf;
-       u32 plen, clen, pbufn;
+       u32 plen, pbufn, maxlen_reserve;
        struct qib_diag_xpkt dp;
        u32 *tmpbuf = NULL;
        struct qib_devdata *dd;
@@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp,
        }
        ppd = &dd->pport[dp.port - 1];
 
-       /* need total length before first word written */
-       /* +1 word is for the qword padding */
-       plen = sizeof(u32) + dp.len;
-       clen = dp.len >> 2;
-
-       if ((plen + 4) > ppd->ibmaxlen) {
+       /*
+        * need total length before first word written, plus 2 Dwords. One Dword
+        * is for padding so we get the full user data when not aligned on
+        * a word boundary. The other Dword is to make sure we have room for the
+        * ICRC which gets tacked on later.
+        */
+       maxlen_reserve = 2 * sizeof(u32);
+       if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
                ret = -EINVAL;
-               goto bail;      /* before writing pbc */
+               goto bail;
        }
+
+       plen = sizeof(u32) + dp.len;
+
        tmpbuf = vmalloc(plen);
        if (!tmpbuf) {
                qib_devinfo(dd->pcidev,
@@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp,
         */
        if (dd->flags & QIB_PIO_FLUSH_WC) {
                qib_flush_wc();
-               qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
+               qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
                qib_flush_wc();
-               __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
        } else
-               qib_pio_copy(piobuf + 2, tmpbuf, clen);
+               qib_pio_copy(piobuf + 2, tmpbuf, plen);
 
        if (dd->flags & QIB_USE_SPCL_TRIG) {
                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
@@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd,
                          const struct diag_observer *op)
 {
        struct diag_observer_list_elt *olp;
-       int ret = -EINVAL;
+       unsigned long flags;
 
        if (!dd || !op)
-               goto bail;
-       ret = -ENOMEM;
+               return -EINVAL;
        olp = vmalloc(sizeof *olp);
        if (!olp) {
                pr_err("vmalloc for observer failed\n");
-               goto bail;
+               return -ENOMEM;
        }
-       if (olp) {
-               unsigned long flags;
 
-               spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
-               olp->op = op;
-               olp->next = dd->diag_observer_list;
-               dd->diag_observer_list = olp;
-               spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
-               ret = 0;
-       }
-bail:
-       return ret;
+       spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+       olp->op = op;
+       olp->next = dd->diag_observer_list;
+       dd->diag_observer_list = olp;
+       spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+       return 0;
 }
 
 /* Remove all registered observers when device is closed */
index 2920bb39a65b946d6033e18d7ee4033e1a455ac2..59fe092b4b0f10be3200d84acf2cd9ca21310f45 100644 (file)
@@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
                        ret = 0;
                        break;
                }
+               sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+               sg->dma_length = sg->length;
+#endif
        }
        return ret;
 }
@@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev,
        BUG_ON(!valid_dma_direction(direction));
 }
 
-static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
-       u64 addr = (u64) page_address(sg_page(sg));
-
-       if (addr)
-               addr += sg->offset;
-       return addr;
-}
-
-static unsigned int qib_sg_dma_len(struct ib_device *dev,
-                                  struct scatterlist *sg)
-{
-       return sg->length;
-}
-
 static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
                                    size_t size, enum dma_data_direction dir)
 {
@@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = {
        .unmap_page = qib_dma_unmap_page,
        .map_sg = qib_map_sg,
        .unmap_sg = qib_unmap_sg,
-       .dma_address = qib_sg_dma_address,
-       .dma_len = qib_sg_dma_len,
        .sync_single_for_cpu = qib_sync_single_for_cpu,
        .sync_single_for_device = qib_sync_single_for_device,
        .alloc_coherent = qib_dma_alloc_coherent,
index 275f247f9fca540e45854655bc7871bacdb6bc98..b15e34eeef685d510c781d25d08433e4b3e7c717 100644 (file)
@@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
                                        cused++;
                                else
                                        cfree++;
-                       if (pusable && cfree && cused < inuse) {
+                       if (cfree && cused < inuse) {
                                udd = dd;
                                inuse = cused;
                        }
@@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
        struct qib_ctxtdata *rcd = fd->rcd;
        struct qib_devdata *dd = rcd->dd;
 
-       if (dd->flags & QIB_HAS_SEND_DMA)
+       if (dd->flags & QIB_HAS_SEND_DMA) {
 
                fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
                                                    dd->unit,
@@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
                                                    fd->subctxt);
                if (!fd->pq)
                        return -ENOMEM;
+       }
 
        return 0;
 }
index c61e2a92b3c115b4ab0b6c3ceb3f52d0d49a10ea..cab610ccd50e3d2cdf5efaf220dfd8ad5a95cc06 100644 (file)
@@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode,
 static ssize_t driver_stats_read(struct file *file, char __user *buf,
                                 size_t count, loff_t *ppos)
 {
+       qib_stats.sps_ints = qib_sps_ints();
        return simple_read_from_buffer(buf, count, ppos, &qib_stats,
                                       sizeof qib_stats);
 }
index 84e593d6007b5c31a3ef0cc080c243697de10027..d68266ac7619b896c49e500256c263ec1c3619c1 100644 (file)
@@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data)
                goto bail;
        }
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
                              QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
@@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
         * isn't set.
         */
        dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
-       dd->int_counter = 0; /* so we check interrupts work again */
+       /* so we check interrupts work again */
+       dd->z_int_counter = qib_int_counter(dd);
        val = dd->control | QLOGIC_IB_C_RESET;
        writeq(val, &dd->kregbase[kr_control]);
        mb(); /* prevent compiler re-ordering around actual reset */
@@ -3266,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd)
 
        dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
 
-       qib_init_pportdata(ppd, dd, 0, 1);
+       ret = qib_init_pportdata(ppd, dd, 0, 1);
+       if (ret)
+               goto bail;
        ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
        ppd->link_speed_supported = QIB_IB_SDR;
        ppd->link_width_enabled = IB_WIDTH_4X;
index 454c2e7668fe71815f80cdabed08bacb35863ec0..7dec89fdc1248dc69c3cab38069e5e2f32986ce3 100644 (file)
@@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data)
                goto bail;
        }
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
-
+       this_cpu_inc(*dd->int_counter);
        if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
                              QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
                unlikely_7220_intr(dd, istat);
@@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
         * isn't set.
         */
        dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
-       dd->int_counter = 0; /* so we check interrupts work again */
+       /* so we check interrupts work again */
+       dd->z_int_counter = qib_int_counter(dd);
        val = dd->control | QLOGIC_IB_C_RESET;
        writeq(val, &dd->kregbase[kr_control]);
        mb(); /* prevent compiler reordering around actual reset */
@@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
        init_waitqueue_head(&cpspec->autoneg_wait);
        INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
 
-       qib_init_pportdata(ppd, dd, 0, 1);
+       ret = qib_init_pportdata(ppd, dd, 0, 1);
+       if (ret)
+               goto bail;
        ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
        ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
 
index d1bd21319d7d2ec128442042448ce101111938a3..a7eb32517a04bced55f9d1d5720b5d060d96e92b 100644 (file)
@@ -3115,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data)
                goto bail;
        }
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* handle "errors" of various kinds first, device ahead of port */
        if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
@@ -3186,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
@@ -3215,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
@@ -3248,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3277,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3306,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3336,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3723,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
        dd->pport->cpspec->ibsymdelta = 0;
        dd->pport->cpspec->iblnkerrdelta = 0;
        dd->pport->cpspec->ibmalfdelta = 0;
-       dd->int_counter = 0; /* so we check interrupts work again */
+       /* so we check interrupts work again */
+       dd->z_int_counter = qib_int_counter(dd);
 
        /*
         * Keep chip from being accessed until we are ready.  Use
@@ -6557,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
                }
 
                dd->num_pports++;
-               qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+               ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+               if (ret) {
+                       dd->num_pports--;
+                       goto bail;
+               }
 
                ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
                ppd->link_width_enabled = IB_WIDTH_4X;
index 24e802f4ea2f04b4554678eee76341043801cd35..5b7aeb224a30ab54d56ba2566aaa8f3d4507de9d 100644 (file)
@@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
 int qib_create_ctxts(struct qib_devdata *dd)
 {
        unsigned i;
-       int ret;
        int local_node_id = pcibus_to_node(dd->pcidev->bus);
 
        if (local_node_id < 0)
@@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd)
        if (!dd->rcd) {
                qib_dev_err(dd,
                        "Unable to allocate ctxtdata array, failing\n");
-               ret = -ENOMEM;
-               goto done;
+               return -ENOMEM;
        }
 
        /* create (one or more) kctxt */
@@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd)
                if (!rcd) {
                        qib_dev_err(dd,
                                "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
-                       ret = -ENOMEM;
-                       goto done;
+                       kfree(dd->rcd);
+                       dd->rcd = NULL;
+                       return -ENOMEM;
                }
                rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
                rcd->seq_cnt = 1;
        }
-       ret = 0;
-done:
-       return ret;
+       return 0;
 }
 
 /*
@@ -233,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
 /*
  * Common code for initializing the physical port structure.
  */
-void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
                        u8 hw_pidx, u8 port)
 {
        int size;
@@ -243,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
 
        spin_lock_init(&ppd->sdma_lock);
        spin_lock_init(&ppd->lflags_lock);
+       spin_lock_init(&ppd->cc_shadow_lock);
        init_waitqueue_head(&ppd->state_wait);
 
        init_timer(&ppd->symerr_clear_timer);
@@ -250,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
        ppd->symerr_clear_timer.data = (unsigned long)ppd;
 
        ppd->qib_wq = NULL;
-
-       spin_lock_init(&ppd->cc_shadow_lock);
+       ppd->ibport_data.pmastats =
+               alloc_percpu(struct qib_pma_counters);
+       if (!ppd->ibport_data.pmastats)
+               return -ENOMEM;
 
        if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
                goto bail;
@@ -299,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
                goto bail_3;
        }
 
-       return;
+       return 0;
 
 bail_3:
        kfree(ppd->ccti_entries_shadow);
@@ -313,7 +313,7 @@ bail_1:
 bail:
        /* User is intentionally disabling the congestion control agent */
        if (!qib_cc_table_size)
-               return;
+               return 0;
 
        if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
                qib_cc_table_size = 0;
@@ -324,7 +324,7 @@ bail:
 
        qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
                port);
-       return;
+       return 0;
 }
 
 static int init_pioavailregs(struct qib_devdata *dd)
@@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd)
 static void verify_interrupt(unsigned long opaque)
 {
        struct qib_devdata *dd = (struct qib_devdata *) opaque;
+       u64 int_counter;
 
        if (!dd)
                return; /* being torn down */
@@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque)
         * If we don't have a lid or any interrupts, let the user know and
         * don't bother checking again.
         */
-       if (dd->int_counter == 0) {
+       int_counter = qib_int_counter(dd) - dd->z_int_counter;
+       if (int_counter == 0) {
                if (!dd->f_intr_fallback(dd))
                        dev_err(&dd->pcidev->dev,
                                "No interrupts detected, not usable.\n");
@@ -633,6 +635,12 @@ wq_error:
        return -ENOMEM;
 }
 
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+       free_percpu(ppd->ibport_data.pmastats);
+       ppd->ibport_data.pmastats = NULL;
+}
+
 /**
  * qib_init - do the actual initialization sequence on the chip
  * @dd: the qlogic_ib device
@@ -920,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd)
                        destroy_workqueue(ppd->qib_wq);
                        ppd->qib_wq = NULL;
                }
+               qib_free_pportdata(ppd);
        }
 
        qib_update_eeprom_log(dd);
@@ -1079,9 +1088,34 @@ void qib_free_devdata(struct qib_devdata *dd)
 #ifdef CONFIG_DEBUG_FS
        qib_dbg_ibdev_exit(&dd->verbs_dev);
 #endif
+       free_percpu(dd->int_counter);
        ib_dealloc_device(&dd->verbs_dev.ibdev);
 }
 
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+       int cpu;
+       u64 int_counter = 0;
+
+       for_each_possible_cpu(cpu)
+               int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+       return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+       unsigned long flags;
+       struct qib_devdata *dd;
+       u64 sps_ints = 0;
+
+       spin_lock_irqsave(&qib_devs_lock, flags);
+       list_for_each_entry(dd, &qib_dev_list, list) {
+               sps_ints += qib_int_counter(dd);
+       }
+       spin_unlock_irqrestore(&qib_devs_lock, flags);
+       return sps_ints;
+}
+
 /*
  * Allocate our primary per-unit data structure.  Must be done via verbs
  * allocator, because the verbs cleanup process both does cleanup and
@@ -1097,14 +1131,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
        int ret;
 
        dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
-       if (!dd) {
-               dd = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
+       if (!dd)
+               return ERR_PTR(-ENOMEM);
 
-#ifdef CONFIG_DEBUG_FS
-       qib_dbg_ibdev_init(&dd->verbs_dev);
-#endif
+       INIT_LIST_HEAD(&dd->list);
 
        idr_preload(GFP_KERNEL);
        spin_lock_irqsave(&qib_devs_lock, flags);
@@ -1121,11 +1151,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
        if (ret < 0) {
                qib_early_err(&pdev->dev,
                              "Could not allocate unit ID: error %d\n", -ret);
-#ifdef CONFIG_DEBUG_FS
-               qib_dbg_ibdev_exit(&dd->verbs_dev);
-#endif
-               ib_dealloc_device(&dd->verbs_dev.ibdev);
-               dd = ERR_PTR(ret);
+               goto bail;
+       }
+       dd->int_counter = alloc_percpu(u64);
+       if (!dd->int_counter) {
+               ret = -ENOMEM;
+               qib_early_err(&pdev->dev,
+                             "Could not allocate per-cpu int_counter\n");
                goto bail;
        }
 
@@ -1139,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
                        qib_early_err(&pdev->dev,
                                "Could not alloc cpulist info, cpu affinity might be wrong\n");
        }
-
-bail:
+#ifdef CONFIG_DEBUG_FS
+       qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
        return dd;
+bail:
+       if (!list_empty(&dd->list))
+               list_del_init(&dd->list);
+       ib_dealloc_device(&dd->verbs_dev.ibdev);
+       return ERR_PTR(ret);;
 }
 
 /*
index ccb119143d20568eb0570ab5b1360a78ca888e08..edad991d60ed5dd73a4638c71c59411fa112763a 100644 (file)
@@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
        return reply((struct ib_smp *)pmp);
 }
 
+static void qib_snapshot_pmacounters(
+       struct qib_ibport *ibp,
+       struct qib_pma_counters *pmacounters)
+{
+       struct qib_pma_counters *p;
+       int cpu;
+
+       memset(pmacounters, 0, sizeof(*pmacounters));
+       for_each_possible_cpu(cpu) {
+               p = per_cpu_ptr(ibp->pmastats, cpu);
+               pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+               pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+               pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+               pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+       }
+}
+
 static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
                                    struct ib_device *ibdev, u8 port)
 {
@@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
        struct qib_ibport *ibp = to_iport(ibdev, port);
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
        u64 swords, rwords, spkts, rpkts, xwait;
+       struct qib_pma_counters pma;
        u8 port_select = p->port_select;
 
        memset(pmp->data, 0, sizeof(pmp->data));
@@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
        p->port_rcv_data = cpu_to_be64(rwords);
        p->port_xmit_packets = cpu_to_be64(spkts);
        p->port_rcv_packets = cpu_to_be64(rpkts);
-       p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit);
-       p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv);
-       p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit);
-       p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv);
+
+       qib_snapshot_pmacounters(ibp, &pma);
+
+       p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+               - ibp->z_unicast_xmit);
+       p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+               - ibp->z_unicast_rcv);
+       p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+               - ibp->z_multicast_xmit);
+       p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+               - ibp->z_multicast_rcv);
 
 bail:
        return reply((struct ib_smp *) pmp);
@@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
        struct qib_ibport *ibp = to_iport(ibdev, port);
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
        u64 swords, rwords, spkts, rpkts, xwait;
+       struct qib_pma_counters pma;
 
        qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
 
@@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
        if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
                ibp->z_port_rcv_packets = rpkts;
 
+       qib_snapshot_pmacounters(ibp, &pma);
+
        if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-               ibp->n_unicast_xmit = 0;
+               ibp->z_unicast_xmit = pma.n_unicast_xmit;
 
        if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-               ibp->n_unicast_rcv = 0;
+               ibp->z_unicast_rcv = pma.n_unicast_rcv;
 
        if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-               ibp->n_multicast_xmit = 0;
+               ibp->z_multicast_xmit = pma.n_multicast_xmit;
 
        if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-               ibp->n_multicast_rcv = 0;
+               ibp->z_multicast_rcv = pma.n_multicast_rcv;
 
        return pma_get_portcounters_ext(pmp, ibdev, port);
 }
index 3ab341320eade7e06f604cfb90e9995311087e76..2f2501890c4ea2b26a9ebd7a82755688605443e3 100644 (file)
@@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp)
        qib_flush_wc();
        qib_sendbuf_done(dd, pbufn);
 
-       ibp->n_unicast_xmit++;
+       this_cpu_inc(ibp->pmastats->n_unicast_xmit);
        goto done;
 
 queue_ack:
index 357b6cfcd46c52391887eb5f3a28415e4ea4cb7c..4c07a8b34ffe27e2bfa89a19883fa25ef6aa64e7 100644 (file)
@@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
        ohdr->bth[0] = cpu_to_be32(bth0);
        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
        ohdr->bth[2] = cpu_to_be32(bth2);
+       this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 }
 
 /**
index 3ad651c3356ca39aaa8d662a576b94be2848a9ff..aaf7039f8ed2112041174d1b320a3d8205348c08 100644 (file)
@@ -280,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp)
        ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
        if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
                if (ah_attr->dlid != QIB_PERMISSIVE_LID)
-                       ibp->n_multicast_xmit++;
+                       this_cpu_inc(ibp->pmastats->n_multicast_xmit);
                else
-                       ibp->n_unicast_xmit++;
+                       this_cpu_inc(ibp->pmastats->n_unicast_xmit);
        } else {
-               ibp->n_unicast_xmit++;
+               this_cpu_inc(ibp->pmastats->n_unicast_xmit);
                lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
                if (unlikely(lid == ppd->lid)) {
                        /*
index 165aee2ca8a0c38dfc1e00b1965172acf4f30193..d2806cae234c254ef7e71ee58e03df5d2c1efdc9 100644 (file)
 /* attempt to drain the queue for 5secs */
 #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
 
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+       struct rb_node node;
+       int refcount;
+       pid_t pid;
+};
+
 struct qib_user_sdma_pkt {
        struct list_head list;  /* list element */
 
@@ -120,15 +131,60 @@ struct qib_user_sdma_queue {
        /* dma page table */
        struct rb_root dma_pages_root;
 
+       struct qib_user_sdma_rb_node *sdma_rb_node;
+
        /* protect everything above... */
        struct mutex lock;
 };
 
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+       struct qib_user_sdma_rb_node *sdma_rb_node;
+       struct rb_node *node = root->rb_node;
+
+       while (node) {
+               sdma_rb_node = container_of(node,
+                       struct qib_user_sdma_rb_node, node);
+               if (pid < sdma_rb_node->pid)
+                       node = node->rb_left;
+               else if (pid > sdma_rb_node->pid)
+                       node = node->rb_right;
+               else
+                       return sdma_rb_node;
+       }
+       return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+       struct rb_node **node = &(root->rb_node);
+       struct rb_node *parent = NULL;
+       struct qib_user_sdma_rb_node *got;
+
+       while (*node) {
+               got = container_of(*node, struct qib_user_sdma_rb_node, node);
+               parent = *node;
+               if (new->pid < got->pid)
+                       node = &((*node)->rb_left);
+               else if (new->pid > got->pid)
+                       node = &((*node)->rb_right);
+               else
+                       return 0;
+       }
+
+       rb_link_node(&new->node, parent, node);
+       rb_insert_color(&new->node, root);
+       return 1;
+}
+
 struct qib_user_sdma_queue *
 qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 {
        struct qib_user_sdma_queue *pq =
                kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+       struct qib_user_sdma_rb_node *sdma_rb_node;
 
        if (!pq)
                goto done;
@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
        pq->num_pending = 0;
        pq->num_sending = 0;
        pq->added = 0;
+       pq->sdma_rb_node = NULL;
 
        INIT_LIST_HEAD(&pq->sent);
        spin_lock_init(&pq->sent_lock);
@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 
        pq->dma_pages_root = RB_ROOT;
 
+       sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+                                       current->pid);
+       if (sdma_rb_node) {
+               sdma_rb_node->refcount++;
+       } else {
+               int ret;
+               sdma_rb_node = kmalloc(sizeof(
+                       struct qib_user_sdma_rb_node), GFP_KERNEL);
+               if (!sdma_rb_node)
+                       goto err_rb;
+
+               sdma_rb_node->refcount = 1;
+               sdma_rb_node->pid = current->pid;
+
+               ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+                                       sdma_rb_node);
+               BUG_ON(ret == 0);
+       }
+       pq->sdma_rb_node = sdma_rb_node;
+
        goto done;
 
+err_rb:
+       dma_pool_destroy(pq->header_cache);
 err_slab:
        kmem_cache_destroy(pq->pkt_slab);
 err_kfree:
@@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
        if (!pq)
                return;
 
-       kmem_cache_destroy(pq->pkt_slab);
+       pq->sdma_rb_node->refcount--;
+       if (pq->sdma_rb_node->refcount == 0) {
+               rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+               kfree(pq->sdma_rb_node);
+       }
        dma_pool_destroy(pq->header_cache);
+       kmem_cache_destroy(pq->pkt_slab);
        kfree(pq);
 }
 
@@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
                                 struct qib_user_sdma_queue *pq,
                                 struct list_head *pktlist, int count)
 {
-       int ret = 0;
        unsigned long flags;
 
        if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
                return -ECOMM;
 
-       spin_lock_irqsave(&ppd->sdma_lock, flags);
-
-       if (unlikely(!__qib_sdma_running(ppd))) {
-               ret = -ECOMM;
-               goto unlock;
+       /* non-blocking mode */
+       if (pq->sdma_rb_node->refcount > 1) {
+               spin_lock_irqsave(&ppd->sdma_lock, flags);
+               if (unlikely(!__qib_sdma_running(ppd))) {
+                       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+                       return -ECOMM;
+               }
+               pq->num_pending += count;
+               list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+               qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+               spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+               return 0;
        }
 
+       /* In this case, descriptors from this process are not
+        * linked to ppd pending queue, interrupt handler
+        * won't update this process, it is OK to directly
+        * modify without sdma lock.
+        */
+
+
        pq->num_pending += count;
-       list_splice_tail_init(pktlist, &ppd->sdma_userpending);
-       qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+       /*
+        * Blocking mode for single rail process, we must
+        * release/regain sdma_lock to give other process
+        * chance to make progress. This is important for
+        * performance.
+        */
+       do {
+               spin_lock_irqsave(&ppd->sdma_lock, flags);
+               if (unlikely(!__qib_sdma_running(ppd))) {
+                       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+                       return -ECOMM;
+               }
+               qib_user_sdma_send_desc(ppd, pktlist);
+               if (!list_empty(pktlist))
+                       qib_sdma_make_progress(ppd);
+               spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+       } while (!list_empty(pktlist));
 
-unlock:
-       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-       return ret;
+       return 0;
 }
 
 int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
                qib_user_sdma_queue_clean(ppd, pq);
 
        while (dim) {
-               int mxp = 8;
+               int mxp = 1;
                int ndesc = 0;
 
                ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
index 092b0bb1bb789aaa78cc74add2fcb0aae24bb94e..9bcfbd8429804e237b23555a54bbd2b1d0fc5a27 100644 (file)
@@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
                if (mcast == NULL)
                        goto drop;
-               ibp->n_multicast_rcv++;
+               this_cpu_inc(ibp->pmastats->n_multicast_rcv);
                list_for_each_entry_rcu(p, &mcast->qp_list, list)
                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
                /*
@@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                                        &rcd->lookaside_qp->refcount))
                                        wake_up(
                                         &rcd->lookaside_qp->wait);
-                                       rcd->lookaside_qp = NULL;
-                               }
+                               rcd->lookaside_qp = NULL;
+                       }
                }
                if (!rcd->lookaside_qp) {
                        qp = qib_lookup_qpn(ibp, qp_num);
@@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                        rcd->lookaside_qpn = qp_num;
                } else
                        qp = rcd->lookaside_qp;
-               ibp->n_unicast_rcv++;
+               this_cpu_inc(ibp->pmastats->n_unicast_rcv);
                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
        }
        return;
index a01c7d2cf54114def170ed8592a403b0b3d21e57..bfc8948fdd3592589d14c611bf16e2a626dbde39 100644 (file)
@@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx {
        struct qib_opcode_stats stats[128];
 };
 
+struct qib_pma_counters {
+       u64 n_unicast_xmit;     /* total unicast packets sent */
+       u64 n_unicast_rcv;      /* total unicast packets received */
+       u64 n_multicast_xmit;   /* total multicast packets sent */
+       u64 n_multicast_rcv;    /* total multicast packets received */
+};
+
 struct qib_ibport {
        struct qib_qp __rcu *qp0;
        struct qib_qp __rcu *qp1;
@@ -680,10 +687,11 @@ struct qib_ibport {
        __be64 mkey;
        __be64 guids[QIB_GUIDS_PER_PORT - 1];   /* writable GUIDs */
        u64 tid;                /* TID for traps */
-       u64 n_unicast_xmit;     /* total unicast packets sent */
-       u64 n_unicast_rcv;      /* total unicast packets received */
-       u64 n_multicast_xmit;   /* total multicast packets sent */
-       u64 n_multicast_rcv;    /* total multicast packets received */
+       struct qib_pma_counters __percpu *pmastats;
+       u64 z_unicast_xmit;     /* starting count for PMA */
+       u64 z_unicast_rcv;      /* starting count for PMA */
+       u64 z_multicast_xmit;   /* starting count for PMA */
+       u64 z_multicast_rcv;    /* starting count for PMA */
        u64 z_symbol_error_counter;             /* starting count for PMA */
        u64 z_link_error_recovery_counter;      /* starting count for PMA */
        u64 z_link_downed_counter;              /* starting count for PMA */
index 16755cdab2c0298433a4e4fe66159d1a88a2a81d..801a1d6937e47412fa1b93882a74d2bfb1e47ecc 100644 (file)
@@ -286,7 +286,7 @@ iter_chunk:
                                err = iommu_map(pd->domain, va_start, pa_start,
                                                        size, flags);
                                if (err) {
-                                       usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n",
+                                       usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
                                                va_start, &pa_start, size, err);
                                        goto err_out;
                                }
index dd03cfe596d6b5c25364c357e99fd967cf55bda6..25f195ef44b02b09d3f34dec9b452cc7bb2a512c 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright (C) 2004 Alex Aizman
  * Copyright (C) 2005 Mike Christie
  * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  * maintained by openib-general@openib.org
  *
  * This software is available to you under a choice of one of two
@@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512;
 module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
 
 int iser_debug_level = 0;
+bool iser_pi_enable = false;
+int iser_pi_guard = 0;
 
 MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -91,6 +93,12 @@ MODULE_VERSION(DRV_VER);
 module_param_named(debug_level, iser_debug_level, int, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
 
+module_param_named(pi_enable, iser_pi_enable, bool, 0644);
+MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
+
+module_param_named(pi_guard, iser_pi_guard, int, 0644);
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
+
 struct iser_global ig;
 
 void
@@ -138,8 +146,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
 int iser_initialize_task_headers(struct iscsi_task *task,
                                                struct iser_tx_desc *tx_desc)
 {
-       struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
-       struct iser_device     *device    = iser_conn->ib_conn->device;
+       struct iser_conn       *ib_conn   = task->conn->dd_data;
+       struct iser_device     *device    = ib_conn->device;
        struct iscsi_iser_task *iser_task = task->dd_data;
        u64 dma_addr;
 
@@ -153,7 +161,7 @@ int iser_initialize_task_headers(struct iscsi_task *task,
        tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
        tx_desc->tx_sg[0].lkey   = device->mr->lkey;
 
-       iser_task->iser_conn            = iser_conn;
+       iser_task->ib_conn = ib_conn;
        return 0;
 }
 /**
@@ -176,6 +184,8 @@ iscsi_iser_task_init(struct iscsi_task *task)
 
        iser_task->command_sent = 0;
        iser_task_rdma_init(iser_task);
+       iser_task->sc = task->sc;
+
        return 0;
 }
 
@@ -278,10 +288,9 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
 static void iscsi_iser_cleanup_task(struct iscsi_task *task)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
-       struct iser_tx_desc     *tx_desc = &iser_task->desc;
-
-       struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
-       struct iser_device     *device    = iser_conn->ib_conn->device;
+       struct iser_tx_desc    *tx_desc   = &iser_task->desc;
+       struct iser_conn       *ib_conn   = task->conn->dd_data;
+       struct iser_device     *device    = ib_conn->device;
 
        ib_dma_unmap_single(device->ib_device,
                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -296,14 +305,25 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
        }
 }
 
+static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
+{
+       struct iscsi_iser_task *iser_task = task->dd_data;
+
+       if (iser_task->dir[ISER_DIR_IN])
+               return iser_check_task_pi_status(iser_task, ISER_DIR_IN,
+                                                sector);
+       else
+               return iser_check_task_pi_status(iser_task, ISER_DIR_OUT,
+                                                sector);
+}
+
 static struct iscsi_cls_conn *
 iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 {
        struct iscsi_conn *conn;
        struct iscsi_cls_conn *cls_conn;
-       struct iscsi_iser_conn *iser_conn;
 
-       cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx);
+       cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);
        if (!cls_conn)
                return NULL;
        conn = cls_conn->dd_data;
@@ -314,10 +334,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
         */
        conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
 
-       iser_conn = conn->dd_data;
-       conn->dd_data = iser_conn;
-       iser_conn->iscsi_conn = conn;
-
        return cls_conn;
 }
 
@@ -325,8 +341,7 @@ static void
 iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_iser_conn *iser_conn = conn->dd_data;
-       struct iser_conn *ib_conn = iser_conn->ib_conn;
+       struct iser_conn *ib_conn = conn->dd_data;
 
        iscsi_conn_teardown(cls_conn);
        /*
@@ -335,7 +350,7 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
         * we free it here.
         */
        if (ib_conn) {
-               ib_conn->iser_conn = NULL;
+               ib_conn->iscsi_conn = NULL;
                iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
        }
 }
@@ -346,7 +361,6 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
                     int is_leading)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_iser_conn *iser_conn;
        struct iscsi_session *session;
        struct iser_conn *ib_conn;
        struct iscsi_endpoint *ep;
@@ -373,11 +387,11 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
        /* binds the iSER connection retrieved from the previously
         * connected ep_handle to the iSCSI layer connection. exchanges
         * connection pointers */
-       iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n",
-                 conn, conn->dd_data, ib_conn);
-       iser_conn = conn->dd_data;
-       ib_conn->iser_conn = iser_conn;
-       iser_conn->ib_conn  = ib_conn;
+       iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn);
+
+       conn->dd_data = ib_conn;
+       ib_conn->iscsi_conn = conn;
+
        iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */
        return 0;
 }
@@ -386,8 +400,7 @@ static void
 iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_iser_conn *iser_conn = conn->dd_data;
-       struct iser_conn *ib_conn = iser_conn->ib_conn;
+       struct iser_conn *ib_conn = conn->dd_data;
 
        /*
         * Userspace may have goofed up and not bound the connection or
@@ -401,7 +414,7 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
                 */
                iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
        }
-       iser_conn->ib_conn = NULL;
+       conn->dd_data = NULL;
 }
 
 static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
@@ -413,6 +426,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
        iscsi_host_free(shost);
 }
 
+static inline unsigned int
+iser_dif_prot_caps(int prot_caps)
+{
+       return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
+                                                     SHOST_DIX_TYPE1_PROTECTION : 0) |
+              ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
+                                                     SHOST_DIX_TYPE2_PROTECTION : 0) |
+              ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
+                                                     SHOST_DIX_TYPE3_PROTECTION : 0);
+}
+
 static struct iscsi_cls_session *
 iscsi_iser_session_create(struct iscsi_endpoint *ep,
                          uint16_t cmds_max, uint16_t qdepth,
@@ -437,8 +461,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
         * older userspace tools (before 2.0-870) did not pass us
         * the leading conn's ep so this will be NULL;
         */
-       if (ep)
+       if (ep) {
                ib_conn = ep->dd_data;
+               if (ib_conn->pi_support) {
+                       u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+
+                       scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
+                       if (iser_pi_guard)
+                               scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
+                       else
+                               scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+               }
+       }
 
        if (iscsi_host_add(shost,
                           ep ? ib_conn->device->ib_device->dma_device : NULL))
@@ -618,7 +652,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
        struct iser_conn *ib_conn;
 
        ib_conn = ep->dd_data;
-       if (ib_conn->iser_conn)
+       if (ib_conn->iscsi_conn)
                /*
                 * Must suspend xmit path if the ep is bound to the
                 * iscsi_conn, so we know we are not accessing the ib_conn
@@ -626,7 +660,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
                 *
                 * This may not be bound if the ep poll failed.
                 */
-               iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn);
+               iscsi_suspend_tx(ib_conn->iscsi_conn);
 
 
        iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state);
@@ -732,6 +766,7 @@ static struct iscsi_transport iscsi_iser_transport = {
        .xmit_task              = iscsi_iser_task_xmit,
        .cleanup_task           = iscsi_iser_cleanup_task,
        .alloc_pdu              = iscsi_iser_pdu_alloc,
+       .check_protection       = iscsi_iser_check_protection,
        /* recovery */
        .session_recovery_timedout = iscsi_session_recovery_timedout,
 
index 67914027c614d5a393d036fcce1ff3edda021a39..324129f80d40b23d5e36874b081e87e21c9abebc 100644 (file)
@@ -8,7 +8,7 @@
  *
  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -46,6 +46,8 @@
 #include <linux/printk.h>
 #include <scsi/libiscsi.h>
 #include <scsi/scsi_transport_iscsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
 
 #include <linux/interrupt.h>
 #include <linux/wait.h>
@@ -67,7 +69,7 @@
 
 #define DRV_NAME       "iser"
 #define PFX            DRV_NAME ": "
-#define DRV_VER                "1.1"
+#define DRV_VER                "1.3"
 
 #define iser_dbg(fmt, arg...)                          \
        do {                                            \
                                        ISER_MAX_TX_MISC_PDUS        + \
                                        ISER_MAX_RX_MISC_PDUS)
 
+/* Max registration work requests per command */
+#define ISER_MAX_REG_WR_PER_CMD                5
+
+/* For Signature we don't support DATAOUTs so no need to make room for them */
+#define ISER_QP_SIG_MAX_REQ_DTOS       (ISER_DEF_XMIT_CMDS_MAX *       \
+                                       (1 + ISER_MAX_REG_WR_PER_CMD) + \
+                                       ISER_MAX_TX_MISC_PDUS         + \
+                                       ISER_MAX_RX_MISC_PDUS)
+
 #define ISER_VER                       0x10
 #define ISER_WSV                       0x08
 #define ISER_RSV                       0x04
 
+#define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
+
 struct iser_hdr {
        u8      flags;
        u8      rsvd[3];
@@ -201,7 +214,6 @@ struct iser_data_buf {
 /* fwd declarations */
 struct iser_device;
 struct iser_cq_desc;
-struct iscsi_iser_conn;
 struct iscsi_iser_task;
 struct iscsi_endpoint;
 
@@ -258,6 +270,7 @@ struct iscsi_iser_task;
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
+       struct ib_device_attr        dev_attr;
        struct ib_cq                 *rx_cq[ISER_MAX_CQ];
        struct ib_cq                 *tx_cq[ISER_MAX_CQ];
        struct ib_mr                 *mr;
@@ -277,17 +290,35 @@ struct iser_device {
                                                            enum iser_data_dir cmd_dir);
 };
 
+#define ISER_CHECK_GUARD       0xc0
+#define ISER_CHECK_REFTAG      0x0f
+#define ISER_CHECK_APPTAG      0x30
+
+enum iser_reg_indicator {
+       ISER_DATA_KEY_VALID     = 1 << 0,
+       ISER_PROT_KEY_VALID     = 1 << 1,
+       ISER_SIG_KEY_VALID      = 1 << 2,
+       ISER_FASTREG_PROTECTED  = 1 << 3,
+};
+
+struct iser_pi_context {
+       struct ib_mr                   *prot_mr;
+       struct ib_fast_reg_page_list   *prot_frpl;
+       struct ib_mr                   *sig_mr;
+};
+
 struct fast_reg_descriptor {
        struct list_head                  list;
        /* For fast registration - FRWR */
        struct ib_mr                     *data_mr;
        struct ib_fast_reg_page_list     *data_frpl;
-       /* Valid for fast registration flag */
-       bool                              valid;
+       struct iser_pi_context           *pi_ctx;
+       /* registration indicators container */
+       u8                                reg_indicators;
 };
 
 struct iser_conn {
-       struct iscsi_iser_conn       *iser_conn; /* iser conn for upcalls  */
+       struct iscsi_conn            *iscsi_conn;
        struct iscsi_endpoint        *ep;
        enum iser_ib_conn_state      state;         /* rdma connection state   */
        atomic_t                     refcount;
@@ -310,6 +341,9 @@ struct iser_conn {
        unsigned int                 rx_desc_head;
        struct iser_rx_desc          *rx_descs;
        struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
+       bool                         pi_support;
+
+       /* Connection memory registration pool */
        union {
                struct {
                        struct ib_fmr_pool      *pool;     /* pool of IB FMRs         */
@@ -319,24 +353,22 @@ struct iser_conn {
                struct {
                        struct list_head        pool;
                        int                     pool_size;
-               } frwr;
-       } fastreg;
-};
-
-struct iscsi_iser_conn {
-       struct iscsi_conn            *iscsi_conn;/* ptr to iscsi conn */
-       struct iser_conn             *ib_conn;   /* iSER IB conn      */
+               } fastreg;
+       };
 };
 
 struct iscsi_iser_task {
        struct iser_tx_desc          desc;
-       struct iscsi_iser_conn       *iser_conn;
+       struct iser_conn             *ib_conn;
        enum iser_task_status        status;
+       struct scsi_cmnd             *sc;
        int                          command_sent;  /* set if command  sent  */
        int                          dir[ISER_DIRS_NUM];      /* set if dir use*/
        struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
        struct iser_data_buf         data[ISER_DIRS_NUM];     /* orig. data des*/
        struct iser_data_buf         data_copy[ISER_DIRS_NUM];/* contig. copy  */
+       struct iser_data_buf         prot[ISER_DIRS_NUM];     /* prot desc     */
+       struct iser_data_buf         prot_copy[ISER_DIRS_NUM];/* prot copy     */
 };
 
 struct iser_page_vec {
@@ -362,6 +394,8 @@ struct iser_global {
 
 extern struct iser_global ig;
 extern int iser_debug_level;
+extern bool iser_pi_enable;
+extern int iser_pi_guard;
 
 /* allocate connection resources needed for rdma functionality */
 int iser_conn_set_full_featured_mode(struct iscsi_conn *conn);
@@ -401,13 +435,15 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task);
 
 void iser_free_rx_descriptors(struct iser_conn *ib_conn);
 
-void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
-                                    enum iser_data_dir         cmd_dir);
+void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+                                    struct iser_data_buf *mem,
+                                    struct iser_data_buf *mem_copy,
+                                    enum iser_data_dir cmd_dir);
 
 int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
                           enum iser_data_dir cmd_dir);
-int  iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
-                           enum iser_data_dir cmd_dir);
+int  iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
+                              enum iser_data_dir cmd_dir);
 
 int  iser_connect(struct iser_conn   *ib_conn,
                  struct sockaddr_in *src_addr,
@@ -420,8 +456,8 @@ int  iser_reg_page_vec(struct iser_conn     *ib_conn,
 
 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
                        enum iser_data_dir cmd_dir);
-void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
-                        enum iser_data_dir cmd_dir);
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+                           enum iser_data_dir cmd_dir);
 
 int  iser_post_recvl(struct iser_conn *ib_conn);
 int  iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -432,12 +468,15 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
                            enum   iser_data_dir       iser_dir,
                            enum   dma_data_direction  dma_dir);
 
-void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
+void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
+                             struct iser_data_buf *data);
 int  iser_initialize_task_headers(struct iscsi_task *task,
                        struct iser_tx_desc *tx_desc);
 int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
 int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
 void iser_free_fmr_pool(struct iser_conn *ib_conn);
-int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_frwr_pool(struct iser_conn *ib_conn);
+int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_fastreg_pool(struct iser_conn *ib_conn);
+u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
+                            enum iser_data_dir cmd_dir, sector_t *sector);
 #endif
index 334f34b1cd46533b0b1dc6cdec9c9f0f30c2af76..2e2d903db838f75e6105a88f875de8acf3667162 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
 
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
-       struct iser_device  *device = iser_task->iser_conn->ib_conn->device;
+       struct iser_device  *device = iser_task->ib_conn->device;
        struct iser_regd_buf *regd_buf;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -62,11 +62,22 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
        if (err)
                return err;
 
+       if (scsi_prot_sg_count(iser_task->sc)) {
+               struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN];
+
+               err = iser_dma_map_task_data(iser_task,
+                                            pbuf_in,
+                                            ISER_DIR_IN,
+                                            DMA_FROM_DEVICE);
+               if (err)
+                       return err;
+       }
+
        if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
                iser_err("Total data length: %ld, less than EDTL: "
                         "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
                         iser_task->data[ISER_DIR_IN].data_len, edtl,
-                        task->itt, iser_task->iser_conn);
+                        task->itt, iser_task->ib_conn);
                return -EINVAL;
        }
 
@@ -99,7 +110,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                       unsigned int edtl)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
-       struct iser_device  *device = iser_task->iser_conn->ib_conn->device;
+       struct iser_device  *device = iser_task->ib_conn->device;
        struct iser_regd_buf *regd_buf;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task,
        if (err)
                return err;
 
+       if (scsi_prot_sg_count(iser_task->sc)) {
+               struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT];
+
+               err = iser_dma_map_task_data(iser_task,
+                                            pbuf_out,
+                                            ISER_DIR_OUT,
+                                            DMA_TO_DEVICE);
+               if (err)
+                       return err;
+       }
+
        if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
                iser_err("Total data length: %ld, less than EDTL: %d, "
                         "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
@@ -327,7 +349,7 @@ free_login_buf:
 
 static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
 {
-       struct iscsi_iser_conn *iser_conn = conn->dd_data;
+       struct iser_conn *ib_conn = conn->dd_data;
        struct iscsi_session *session = conn->session;
 
        iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
@@ -340,19 +362,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
         * response) and no posted send buffers left - they must have been
         * consumed during previous login phases.
         */
-       WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
-       WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
+       WARN_ON(ib_conn->post_recv_buf_count != 1);
+       WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);
 
        if (session->discovery_sess) {
                iser_info("Discovery session, re-using login RX buffer\n");
                return 0;
        } else
                iser_info("Normal session, posting batch of RX %d buffers\n",
-                         iser_conn->ib_conn->min_posted_rx);
+                         ib_conn->min_posted_rx);
 
        /* Initial post receive buffers */
-       if (iser_post_recvm(iser_conn->ib_conn,
-                           iser_conn->ib_conn->min_posted_rx))
+       if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))
                return -ENOMEM;
 
        return 0;
@@ -364,11 +385,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
 int iser_send_command(struct iscsi_conn *conn,
                      struct iscsi_task *task)
 {
-       struct iscsi_iser_conn *iser_conn = conn->dd_data;
+       struct iser_conn *ib_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        unsigned long edtl;
        int err;
-       struct iser_data_buf *data_buf;
+       struct iser_data_buf *data_buf, *prot_buf;
        struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
        struct scsi_cmnd *sc  =  task->sc;
        struct iser_tx_desc *tx_desc = &iser_task->desc;
@@ -377,20 +398,28 @@ int iser_send_command(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        tx_desc->type = ISCSI_TX_SCSI_COMMAND;
-       iser_create_send_desc(iser_conn->ib_conn, tx_desc);
+       iser_create_send_desc(ib_conn, tx_desc);
 
-       if (hdr->flags & ISCSI_FLAG_CMD_READ)
+       if (hdr->flags & ISCSI_FLAG_CMD_READ) {
                data_buf = &iser_task->data[ISER_DIR_IN];
-       else
+               prot_buf = &iser_task->prot[ISER_DIR_IN];
+       } else {
                data_buf = &iser_task->data[ISER_DIR_OUT];
+               prot_buf = &iser_task->prot[ISER_DIR_OUT];
+       }
 
        if (scsi_sg_count(sc)) { /* using a scatter list */
                data_buf->buf  = scsi_sglist(sc);
                data_buf->size = scsi_sg_count(sc);
        }
-
        data_buf->data_len = scsi_bufflen(sc);
 
+       if (scsi_prot_sg_count(sc)) {
+               prot_buf->buf  = scsi_prot_sglist(sc);
+               prot_buf->size = scsi_prot_sg_count(sc);
+               prot_buf->data_len = sc->prot_sdb->length;
+       }
+
        if (hdr->flags & ISCSI_FLAG_CMD_READ) {
                err = iser_prepare_read_cmd(task, edtl);
                if (err)
@@ -408,7 +437,7 @@ int iser_send_command(struct iscsi_conn *conn,
 
        iser_task->status = ISER_TASK_STATUS_STARTED;
 
-       err = iser_post_send(iser_conn->ib_conn, tx_desc);
+       err = iser_post_send(ib_conn, tx_desc);
        if (!err)
                return 0;
 
@@ -424,7 +453,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
                       struct iscsi_task *task,
                       struct iscsi_data *hdr)
 {
-       struct iscsi_iser_conn *iser_conn = conn->dd_data;
+       struct iser_conn *ib_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_tx_desc *tx_desc = NULL;
        struct iser_regd_buf *regd_buf;
@@ -473,7 +502,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
                 itt, buf_offset, data_seg_len);
 
 
-       err = iser_post_send(iser_conn->ib_conn, tx_desc);
+       err = iser_post_send(ib_conn, tx_desc);
        if (!err)
                return 0;
 
@@ -486,19 +515,18 @@ send_data_out_error:
 int iser_send_control(struct iscsi_conn *conn,
                      struct iscsi_task *task)
 {
-       struct iscsi_iser_conn *iser_conn = conn->dd_data;
+       struct iser_conn *ib_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_tx_desc *mdesc = &iser_task->desc;
        unsigned long data_seg_len;
        int err = 0;
        struct iser_device *device;
-       struct iser_conn *ib_conn = iser_conn->ib_conn;
 
        /* build the tx desc regd header and add it to the tx desc dto */
        mdesc->type = ISCSI_TX_CONTROL;
-       iser_create_send_desc(iser_conn->ib_conn, mdesc);
+       iser_create_send_desc(ib_conn, mdesc);
 
-       device = iser_conn->ib_conn->device;
+       device = ib_conn->device;
 
        data_seg_len = ntoh24(task->hdr->dlength);
 
@@ -513,14 +541,13 @@ int iser_send_control(struct iscsi_conn *conn,
                        ib_conn->login_req_dma, task->data_count,
                        DMA_TO_DEVICE);
 
-               memcpy(iser_conn->ib_conn->login_req_buf, task->data,
-                                                       task->data_count);
+               memcpy(ib_conn->login_req_buf, task->data, task->data_count);
 
                ib_dma_sync_single_for_device(device->ib_device,
                        ib_conn->login_req_dma, task->data_count,
                        DMA_TO_DEVICE);
 
-               tx_dsg->addr    = iser_conn->ib_conn->login_req_dma;
+               tx_dsg->addr    = ib_conn->login_req_dma;
                tx_dsg->length  = task->data_count;
                tx_dsg->lkey    = device->mr->lkey;
                mdesc->num_sge = 2;
@@ -529,7 +556,7 @@ int iser_send_control(struct iscsi_conn *conn,
        if (task == conn->login_task) {
                iser_dbg("op %x dsl %lx, posting login rx buffer\n",
                         task->hdr->opcode, data_seg_len);
-               err = iser_post_recvl(iser_conn->ib_conn);
+               err = iser_post_recvl(ib_conn);
                if (err)
                        goto send_control_error;
                err = iser_post_rx_bufs(conn, task->hdr);
@@ -537,7 +564,7 @@ int iser_send_control(struct iscsi_conn *conn,
                        goto send_control_error;
        }
 
-       err = iser_post_send(iser_conn->ib_conn, mdesc);
+       err = iser_post_send(ib_conn, mdesc);
        if (!err)
                return 0;
 
@@ -553,7 +580,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
                         unsigned long rx_xfer_len,
                         struct iser_conn *ib_conn)
 {
-       struct iscsi_iser_conn *conn = ib_conn->iser_conn;
        struct iscsi_hdr *hdr;
        u64 rx_dma;
        int rx_buflen, outstanding, count, err;
@@ -575,17 +601,17 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
        iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
                        hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
 
-       iscsi_iser_recv(conn->iscsi_conn, hdr,
-               rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
+       iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data,
+                       rx_xfer_len - ISER_HEADERS_LEN);
 
        ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
-                       rx_buflen, DMA_FROM_DEVICE);
+                                     rx_buflen, DMA_FROM_DEVICE);
 
        /* decrementing conn->post_recv_buf_count only --after-- freeing the   *
         * task eliminates the need to worry on tasks which are completed in   *
         * parallel to the execution of iser_conn_term. So the code that waits *
         * for the posted rx bufs refcount to become zero handles everything   */
-       conn->ib_conn->post_recv_buf_count--;
+       ib_conn->post_recv_buf_count--;
 
        if (rx_dma == ib_conn->login_resp_dma)
                return;
@@ -635,6 +661,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
        iser_task->data[ISER_DIR_IN].data_len  = 0;
        iser_task->data[ISER_DIR_OUT].data_len = 0;
 
+       iser_task->prot[ISER_DIR_IN].data_len  = 0;
+       iser_task->prot[ISER_DIR_OUT].data_len = 0;
+
        memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
               sizeof(struct iser_regd_buf));
        memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
@@ -643,28 +672,63 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
 
 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
 {
-       struct iser_device *device = iser_task->iser_conn->ib_conn->device;
-       int is_rdma_aligned = 1;
+       struct iser_device *device = iser_task->ib_conn->device;
+       int is_rdma_data_aligned = 1;
+       int is_rdma_prot_aligned = 1;
+       int prot_count = scsi_prot_sg_count(iser_task->sc);
 
        /* if we were reading, copy back to unaligned sglist,
         * anyway dma_unmap and free the copy
         */
        if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
-               is_rdma_aligned = 0;
-               iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN);
+               is_rdma_data_aligned = 0;
+               iser_finalize_rdma_unaligned_sg(iser_task,
+                                               &iser_task->data[ISER_DIR_IN],
+                                               &iser_task->data_copy[ISER_DIR_IN],
+                                               ISER_DIR_IN);
        }
+
        if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
-               is_rdma_aligned = 0;
-               iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
+               is_rdma_data_aligned = 0;
+               iser_finalize_rdma_unaligned_sg(iser_task,
+                                               &iser_task->data[ISER_DIR_OUT],
+                                               &iser_task->data_copy[ISER_DIR_OUT],
+                                               ISER_DIR_OUT);
+       }
+
+       if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
+               is_rdma_prot_aligned = 0;
+               iser_finalize_rdma_unaligned_sg(iser_task,
+                                               &iser_task->prot[ISER_DIR_IN],
+                                               &iser_task->prot_copy[ISER_DIR_IN],
+                                               ISER_DIR_IN);
+       }
+
+       if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
+               is_rdma_prot_aligned = 0;
+               iser_finalize_rdma_unaligned_sg(iser_task,
+                                               &iser_task->prot[ISER_DIR_OUT],
+                                               &iser_task->prot_copy[ISER_DIR_OUT],
+                                               ISER_DIR_OUT);
        }
 
-       if (iser_task->dir[ISER_DIR_IN])
+       if (iser_task->dir[ISER_DIR_IN]) {
                device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
+               if (is_rdma_data_aligned)
+                       iser_dma_unmap_task_data(iser_task,
+                                                &iser_task->data[ISER_DIR_IN]);
+               if (prot_count && is_rdma_prot_aligned)
+                       iser_dma_unmap_task_data(iser_task,
+                                                &iser_task->prot[ISER_DIR_IN]);
+       }
 
-       if (iser_task->dir[ISER_DIR_OUT])
+       if (iser_task->dir[ISER_DIR_OUT]) {
                device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
-
-       /* if the data was unaligned, it was already unmapped and then copied */
-       if (is_rdma_aligned)
-               iser_dma_unmap_task_data(iser_task);
+               if (is_rdma_data_aligned)
+                       iser_dma_unmap_task_data(iser_task,
+                                                &iser_task->data[ISER_DIR_OUT]);
+               if (prot_count && is_rdma_prot_aligned)
+                       iser_dma_unmap_task_data(iser_task,
+                                                &iser_task->prot[ISER_DIR_OUT]);
+       }
 }
index 1ce0c97d2ccb894d33ef9871456726aa45b51ab9..47acd3ad3a17e6a2e1609fb8752ea65d41b82387 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * iser_start_rdma_unaligned_sg
  */
 static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+                                       struct iser_data_buf *data,
+                                       struct iser_data_buf *data_copy,
                                        enum iser_data_dir cmd_dir)
 {
-       int dma_nents;
-       struct ib_device *dev;
+       struct ib_device *dev = iser_task->ib_conn->device->ib_device;
+       struct scatterlist *sgl = (struct scatterlist *)data->buf;
+       struct scatterlist *sg;
        char *mem = NULL;
-       struct iser_data_buf *data = &iser_task->data[cmd_dir];
-       unsigned long  cmd_data_len = data->data_len;
+       unsigned long  cmd_data_len = 0;
+       int dma_nents, i;
+
+       for_each_sg(sgl, sg, data->size, i)
+               cmd_data_len += ib_sg_dma_len(dev, sg);
 
        if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
                mem = (void *)__get_free_pages(GFP_ATOMIC,
@@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
 
        if (mem == NULL) {
                iser_err("Failed to allocate mem size %d %d for copying sglist\n",
-                        data->size,(int)cmd_data_len);
+                        data->size, (int)cmd_data_len);
                return -ENOMEM;
        }
 
        if (cmd_dir == ISER_DIR_OUT) {
                /* copy the unaligned sg the buffer which is used for RDMA */
-               struct scatterlist *sgl = (struct scatterlist *)data->buf;
-               struct scatterlist *sg;
                int i;
                char *p, *from;
 
+               sgl = (struct scatterlist *)data->buf;
                p = mem;
                for_each_sg(sgl, sg, data->size, i) {
                        from = kmap_atomic(sg_page(sg));
@@ -83,39 +88,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                }
        }
 
-       sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len);
-       iser_task->data_copy[cmd_dir].buf  =
-               &iser_task->data_copy[cmd_dir].sg_single;
-       iser_task->data_copy[cmd_dir].size = 1;
+       sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
+       data_copy->buf = &data_copy->sg_single;
+       data_copy->size = 1;
+       data_copy->copy_buf = mem;
 
-       iser_task->data_copy[cmd_dir].copy_buf  = mem;
-
-       dev = iser_task->iser_conn->ib_conn->device->ib_device;
-       dma_nents = ib_dma_map_sg(dev,
-                                 &iser_task->data_copy[cmd_dir].sg_single,
-                                 1,
+       dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
                                  (cmd_dir == ISER_DIR_OUT) ?
                                  DMA_TO_DEVICE : DMA_FROM_DEVICE);
        BUG_ON(dma_nents == 0);
 
-       iser_task->data_copy[cmd_dir].dma_nents = dma_nents;
+       data_copy->dma_nents = dma_nents;
+       data_copy->data_len = cmd_data_len;
+
        return 0;
 }
 
 /**
  * iser_finalize_rdma_unaligned_sg
  */
+
 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
-                                    enum iser_data_dir         cmd_dir)
+                                    struct iser_data_buf *data,
+                                    struct iser_data_buf *data_copy,
+                                    enum iser_data_dir cmd_dir)
 {
        struct ib_device *dev;
-       struct iser_data_buf *mem_copy;
        unsigned long  cmd_data_len;
 
-       dev = iser_task->iser_conn->ib_conn->device->ib_device;
-       mem_copy = &iser_task->data_copy[cmd_dir];
+       dev = iser_task->ib_conn->device->ib_device;
 
-       ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1,
+       ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
                        (cmd_dir == ISER_DIR_OUT) ?
                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
@@ -127,10 +130,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                int i;
 
                /* copy back read RDMA to unaligned sg */
-               mem     = mem_copy->copy_buf;
+               mem = data_copy->copy_buf;
 
-               sgl     = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf;
-               sg_size = iser_task->data[ISER_DIR_IN].size;
+               sgl = (struct scatterlist *)data->buf;
+               sg_size = data->size;
 
                p = mem;
                for_each_sg(sgl, sg, sg_size, i) {
@@ -143,15 +146,15 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                }
        }
 
-       cmd_data_len = iser_task->data[cmd_dir].data_len;
+       cmd_data_len = data->data_len;
 
        if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
-               free_pages((unsigned long)mem_copy->copy_buf,
+               free_pages((unsigned long)data_copy->copy_buf,
                           ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
        else
-               kfree(mem_copy->copy_buf);
+               kfree(data_copy->copy_buf);
 
-       mem_copy->copy_buf = NULL;
+       data_copy->copy_buf = NULL;
 }
 
 #define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
@@ -319,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
        struct ib_device *dev;
 
        iser_task->dir[iser_dir] = 1;
-       dev = iser_task->iser_conn->ib_conn->device->ib_device;
+       dev = iser_task->ib_conn->device->ib_device;
 
        data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
        if (data->dma_nents == 0) {
@@ -329,31 +332,23 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
        return 0;
 }
 
-void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
+void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
+                             struct iser_data_buf *data)
 {
        struct ib_device *dev;
-       struct iser_data_buf *data;
 
-       dev = iser_task->iser_conn->ib_conn->device->ib_device;
-
-       if (iser_task->dir[ISER_DIR_IN]) {
-               data = &iser_task->data[ISER_DIR_IN];
-               ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
-       }
-
-       if (iser_task->dir[ISER_DIR_OUT]) {
-               data = &iser_task->data[ISER_DIR_OUT];
-               ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE);
-       }
+       dev = iser_task->ib_conn->device->ib_device;
+       ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
 }
 
 static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
                              struct ib_device *ibdev,
+                             struct iser_data_buf *mem,
+                             struct iser_data_buf *mem_copy,
                              enum iser_data_dir cmd_dir,
                              int aligned_len)
 {
-       struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
-       struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+       struct iscsi_conn    *iscsi_conn = iser_task->ib_conn->iscsi_conn;
 
        iscsi_conn->fmr_unalign_cnt++;
        iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
@@ -363,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
                iser_data_buf_dump(mem, ibdev);
 
        /* unmap the command data before accessing it */
-       iser_dma_unmap_task_data(iser_task);
+       iser_dma_unmap_task_data(iser_task, mem);
 
        /* allocate copy buf, if we are writing, copy the */
        /* unaligned scatterlist, dma map the copy        */
-       if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
-                       return -ENOMEM;
+       if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
+               return -ENOMEM;
 
        return 0;
 }
@@ -382,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
 int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
                          enum iser_data_dir cmd_dir)
 {
-       struct iser_conn     *ib_conn = iser_task->iser_conn->ib_conn;
+       struct iser_conn     *ib_conn = iser_task->ib_conn;
        struct iser_device   *device = ib_conn->device;
        struct ib_device     *ibdev = device->ib_device;
        struct iser_data_buf *mem = &iser_task->data[cmd_dir];
@@ -396,7 +391,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
 
        aligned_len = iser_data_buf_aligned_len(mem, ibdev);
        if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(iser_task, ibdev,
+               err = fall_to_bounce_buf(iser_task, ibdev, mem,
+                                        &iser_task->data_copy[cmd_dir],
                                         cmd_dir, aligned_len);
                if (err) {
                        iser_err("failed to allocate bounce buffer\n");
@@ -422,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
                         (unsigned long)regd_buf->reg.va,
                         (unsigned long)regd_buf->reg.len);
        } else { /* use FMR for multiple dma entries */
-               iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
-               err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+               iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
+               err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
                                        &regd_buf->reg);
                if (err && err != -EAGAIN) {
                        iser_data_buf_dump(mem, ibdev);
@@ -431,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
                                 mem->dma_nents,
                                 ntoh24(iser_task->desc.iscsi_header.dlength));
                        iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
-                                ib_conn->fastreg.fmr.page_vec->data_size,
-                                ib_conn->fastreg.fmr.page_vec->length,
-                                ib_conn->fastreg.fmr.page_vec->offset);
-                       for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
+                                ib_conn->fmr.page_vec->data_size,
+                                ib_conn->fmr.page_vec->length,
+                                ib_conn->fmr.page_vec->offset);
+                       for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
                                iser_err("page_vec[%d] = 0x%llx\n", i,
-                                        (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
+                                        (unsigned long long) ib_conn->fmr.page_vec->pages[i]);
                }
                if (err)
                        return err;
@@ -444,94 +440,280 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
        return 0;
 }
 
-static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
-                           struct iser_conn *ib_conn,
+static inline enum ib_t10_dif_type
+scsi2ib_prot_type(unsigned char prot_type)
+{
+       switch (prot_type) {
+       case SCSI_PROT_DIF_TYPE0:
+               return IB_T10DIF_NONE;
+       case SCSI_PROT_DIF_TYPE1:
+               return IB_T10DIF_TYPE1;
+       case SCSI_PROT_DIF_TYPE2:
+               return IB_T10DIF_TYPE2;
+       case SCSI_PROT_DIF_TYPE3:
+               return IB_T10DIF_TYPE3;
+       default:
+               return IB_T10DIF_NONE;
+       }
+}
+
+
+static int
+iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
+{
+       unsigned char scsi_ptype = scsi_get_prot_type(sc);
+
+       sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
+       sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
+       sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size;
+       sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size;
+
+       switch (scsi_get_prot_op(sc)) {
+       case SCSI_PROT_WRITE_INSERT:
+       case SCSI_PROT_READ_STRIP:
+               sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
+               sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+               sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
+               sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
+                                                 0xffffffff;
+               break;
+       case SCSI_PROT_READ_INSERT:
+       case SCSI_PROT_WRITE_STRIP:
+               sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+               sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
+               sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
+                                                0xffffffff;
+               sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+               break;
+       case SCSI_PROT_READ_PASS:
+       case SCSI_PROT_WRITE_PASS:
+               sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+               sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
+               sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
+                                                0xffffffff;
+               sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+               sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
+               sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
+                                                 0xffffffff;
+               break;
+       default:
+               iser_err("Unsupported PI operation %d\n",
+                        scsi_get_prot_op(sc));
+               return -EINVAL;
+       }
+       return 0;
+}
+
+
+static int
+iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
+{
+       switch (scsi_get_prot_type(sc)) {
+       case SCSI_PROT_DIF_TYPE0:
+               *mask = 0x0;
+               break;
+       case SCSI_PROT_DIF_TYPE1:
+       case SCSI_PROT_DIF_TYPE2:
+               *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
+               break;
+       case SCSI_PROT_DIF_TYPE3:
+               *mask = ISER_CHECK_GUARD;
+               break;
+       default:
+               iser_err("Unsupported protection type %d\n",
+                        scsi_get_prot_type(sc));
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int
+iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
+               struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
+               struct ib_sge *prot_sge, struct ib_sge *sig_sge)
+{
+       struct iser_conn *ib_conn = iser_task->ib_conn;
+       struct iser_pi_context *pi_ctx = desc->pi_ctx;
+       struct ib_send_wr sig_wr, inv_wr;
+       struct ib_send_wr *bad_wr, *wr = NULL;
+       struct ib_sig_attrs sig_attrs;
+       int ret;
+       u32 key;
+
+       memset(&sig_attrs, 0, sizeof(sig_attrs));
+       ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
+       if (ret)
+               goto err;
+
+       ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
+       if (ret)
+               goto err;
+
+       if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
+               memset(&inv_wr, 0, sizeof(inv_wr));
+               inv_wr.opcode = IB_WR_LOCAL_INV;
+               inv_wr.wr_id = ISER_FASTREG_LI_WRID;
+               inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+               wr = &inv_wr;
+               /* Bump the key */
+               key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
+               ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
+       }
+
+       memset(&sig_wr, 0, sizeof(sig_wr));
+       sig_wr.opcode = IB_WR_REG_SIG_MR;
+       sig_wr.wr_id = ISER_FASTREG_LI_WRID;
+       sig_wr.sg_list = data_sge;
+       sig_wr.num_sge = 1;
+       sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
+       sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+       if (scsi_prot_sg_count(iser_task->sc))
+               sig_wr.wr.sig_handover.prot = prot_sge;
+       sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
+                                             IB_ACCESS_REMOTE_READ |
+                                             IB_ACCESS_REMOTE_WRITE;
+
+       if (!wr)
+               wr = &sig_wr;
+       else
+               wr->next = &sig_wr;
+
+       ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+       if (ret) {
+               iser_err("reg_sig_mr failed, ret:%d\n", ret);
+               goto err;
+       }
+       desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
+
+       sig_sge->lkey = pi_ctx->sig_mr->lkey;
+       sig_sge->addr = 0;
+       sig_sge->length = data_sge->length + prot_sge->length;
+       if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
+           scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
+               sig_sge->length += (data_sge->length /
+                                  iser_task->sc->device->sector_size) * 8;
+       }
+
+       iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
+                sig_sge->addr, sig_sge->length,
+                sig_sge->lkey);
+err:
+       return ret;
+}
+
+static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                            struct iser_regd_buf *regd_buf,
-                           u32 offset, unsigned int data_size,
-                           unsigned int page_list_len)
+                           struct iser_data_buf *mem,
+                           enum iser_reg_indicator ind,
+                           struct ib_sge *sge)
 {
+       struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
+       struct iser_conn *ib_conn = iser_task->ib_conn;
+       struct iser_device *device = ib_conn->device;
+       struct ib_device *ibdev = device->ib_device;
+       struct ib_mr *mr;
+       struct ib_fast_reg_page_list *frpl;
        struct ib_send_wr fastreg_wr, inv_wr;
        struct ib_send_wr *bad_wr, *wr = NULL;
        u8 key;
-       int ret;
+       int ret, offset, size, plen;
+
+       /* if there a single dma entry, dma mr suffices */
+       if (mem->dma_nents == 1) {
+               struct scatterlist *sg = (struct scatterlist *)mem->buf;
 
-       if (!desc->valid) {
+               sge->lkey = device->mr->lkey;
+               sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
+               sge->length  = ib_sg_dma_len(ibdev, &sg[0]);
+
+               iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
+                        sge->lkey, sge->addr, sge->length);
+               return 0;
+       }
+
+       if (ind == ISER_DATA_KEY_VALID) {
+               mr = desc->data_mr;
+               frpl = desc->data_frpl;
+       } else {
+               mr = desc->pi_ctx->prot_mr;
+               frpl = desc->pi_ctx->prot_frpl;
+       }
+
+       plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
+                                  &offset, &size);
+       if (plen * SIZE_4K < size) {
+               iser_err("fast reg page_list too short to hold this SG\n");
+               return -EINVAL;
+       }
+
+       if (!(desc->reg_indicators & ind)) {
                memset(&inv_wr, 0, sizeof(inv_wr));
+               inv_wr.wr_id = ISER_FASTREG_LI_WRID;
                inv_wr.opcode = IB_WR_LOCAL_INV;
-               inv_wr.send_flags = IB_SEND_SIGNALED;
-               inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+               inv_wr.ex.invalidate_rkey = mr->rkey;
                wr = &inv_wr;
                /* Bump the key */
-               key = (u8)(desc->data_mr->rkey & 0x000000FF);
-               ib_update_fast_reg_key(desc->data_mr, ++key);
+               key = (u8)(mr->rkey & 0x000000FF);
+               ib_update_fast_reg_key(mr, ++key);
        }
 
        /* Prepare FASTREG WR */
        memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+       fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
        fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.send_flags = IB_SEND_SIGNALED;
-       fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
-       fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
-       fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+       fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
+       fastreg_wr.wr.fast_reg.page_list = frpl;
+       fastreg_wr.wr.fast_reg.page_list_len = plen;
        fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
-       fastreg_wr.wr.fast_reg.length = data_size;
-       fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+       fastreg_wr.wr.fast_reg.length = size;
+       fastreg_wr.wr.fast_reg.rkey = mr->rkey;
        fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
                                               IB_ACCESS_REMOTE_WRITE |
                                               IB_ACCESS_REMOTE_READ);
 
-       if (!wr) {
+       if (!wr)
                wr = &fastreg_wr;
-               atomic_inc(&ib_conn->post_send_buf_count);
-       } else {
+       else
                wr->next = &fastreg_wr;
-               atomic_add(2, &ib_conn->post_send_buf_count);
-       }
 
        ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
        if (ret) {
-               if (bad_wr->next)
-                       atomic_sub(2, &ib_conn->post_send_buf_count);
-               else
-                       atomic_dec(&ib_conn->post_send_buf_count);
                iser_err("fast registration failed, ret:%d\n", ret);
                return ret;
        }
-       desc->valid = false;
+       desc->reg_indicators &= ~ind;
 
-       regd_buf->reg.mem_h = desc;
-       regd_buf->reg.lkey = desc->data_mr->lkey;
-       regd_buf->reg.rkey = desc->data_mr->rkey;
-       regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
-       regd_buf->reg.len = data_size;
-       regd_buf->reg.is_mr = 1;
+       sge->lkey = mr->lkey;
+       sge->addr = frpl->page_list[0] + offset;
+       sge->length = size;
 
        return ret;
 }
 
 /**
- * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
  * using Fast Registration WR (if possible) obtaining rkey and va
  *
  * returns 0 on success, errno code on failure
  */
-int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
-                          enum iser_data_dir cmd_dir)
+int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
+                             enum iser_data_dir cmd_dir)
 {
-       struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+       struct iser_conn *ib_conn = iser_task->ib_conn;
        struct iser_device *device = ib_conn->device;
        struct ib_device *ibdev = device->ib_device;
        struct iser_data_buf *mem = &iser_task->data[cmd_dir];
        struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
-       struct fast_reg_descriptor *desc;
-       unsigned int data_size, page_list_len;
+       struct fast_reg_descriptor *desc = NULL;
+       struct ib_sge data_sge;
        int err, aligned_len;
        unsigned long flags;
-       u32 offset;
 
        aligned_len = iser_data_buf_aligned_len(mem, ibdev);
        if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(iser_task, ibdev,
+               err = fall_to_bounce_buf(iser_task, ibdev, mem,
+                                        &iser_task->data_copy[cmd_dir],
                                         cmd_dir, aligned_len);
                if (err) {
                        iser_err("failed to allocate bounce buffer\n");
@@ -540,41 +722,79 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
                mem = &iser_task->data_copy[cmd_dir];
        }
 
-       /* if there a single dma entry, dma mr suffices */
-       if (mem->dma_nents == 1) {
-               struct scatterlist *sg = (struct scatterlist *)mem->buf;
-
-               regd_buf->reg.lkey = device->mr->lkey;
-               regd_buf->reg.rkey = device->mr->rkey;
-               regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
-               regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-               regd_buf->reg.is_mr = 0;
-       } else {
+       if (mem->dma_nents != 1 ||
+           scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
                spin_lock_irqsave(&ib_conn->lock, flags);
-               desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+               desc = list_first_entry(&ib_conn->fastreg.pool,
                                        struct fast_reg_descriptor, list);
                list_del(&desc->list);
                spin_unlock_irqrestore(&ib_conn->lock, flags);
-               page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
-                                                   desc->data_frpl->page_list,
-                                                   &offset, &data_size);
-
-               if (page_list_len * SIZE_4K < data_size) {
-                       iser_err("fast reg page_list too short to hold this SG\n");
-                       err = -EINVAL;
-                       goto err_reg;
+               regd_buf->reg.mem_h = desc;
+       }
+
+       err = iser_fast_reg_mr(iser_task, regd_buf, mem,
+                              ISER_DATA_KEY_VALID, &data_sge);
+       if (err)
+               goto err_reg;
+
+       if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
+               struct ib_sge prot_sge, sig_sge;
+
+               memset(&prot_sge, 0, sizeof(prot_sge));
+               if (scsi_prot_sg_count(iser_task->sc)) {
+                       mem = &iser_task->prot[cmd_dir];
+                       aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+                       if (aligned_len != mem->dma_nents) {
+                               err = fall_to_bounce_buf(iser_task, ibdev, mem,
+                                                        &iser_task->prot_copy[cmd_dir],
+                                                        cmd_dir, aligned_len);
+                               if (err) {
+                                       iser_err("failed to allocate bounce buffer\n");
+                                       return err;
+                               }
+                               mem = &iser_task->prot_copy[cmd_dir];
+                       }
+
+                       err = iser_fast_reg_mr(iser_task, regd_buf, mem,
+                                              ISER_PROT_KEY_VALID, &prot_sge);
+                       if (err)
+                               goto err_reg;
                }
 
-               err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
-                                      offset, data_size, page_list_len);
-               if (err)
-                       goto err_reg;
+               err = iser_reg_sig_mr(iser_task, desc, &data_sge,
+                                     &prot_sge, &sig_sge);
+               if (err) {
+                       iser_err("Failed to register signature mr\n");
+                       return err;
+               }
+               desc->reg_indicators |= ISER_FASTREG_PROTECTED;
+
+               regd_buf->reg.lkey = sig_sge.lkey;
+               regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
+               regd_buf->reg.va = sig_sge.addr;
+               regd_buf->reg.len = sig_sge.length;
+               regd_buf->reg.is_mr = 1;
+       } else {
+               if (desc) {
+                       regd_buf->reg.rkey = desc->data_mr->rkey;
+                       regd_buf->reg.is_mr = 1;
+               } else {
+                       regd_buf->reg.rkey = device->mr->rkey;
+                       regd_buf->reg.is_mr = 0;
+               }
+
+               regd_buf->reg.lkey = data_sge.lkey;
+               regd_buf->reg.va = data_sge.addr;
+               regd_buf->reg.len = data_sge.length;
        }
 
        return 0;
 err_reg:
-       spin_lock_irqsave(&ib_conn->lock, flags);
-       list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
-       spin_unlock_irqrestore(&ib_conn->lock, flags);
+       if (desc) {
+               spin_lock_irqsave(&ib_conn->lock, flags);
+               list_add_tail(&desc->list, &ib_conn->fastreg.pool);
+               spin_unlock_irqrestore(&ib_conn->lock, flags);
+       }
+
        return err;
 }
index ca37edef27910cc188ab89a6e626ce29d6d5a344..32849f2becde9e6fd4882bf0e3b63545351069c9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler,
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
-       int i, j;
        struct iser_cq_desc *cq_desc;
-       struct ib_device_attr *dev_attr;
+       struct ib_device_attr *dev_attr = &device->dev_attr;
+       int ret, i, j;
 
-       dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
-       if (!dev_attr)
-               return -ENOMEM;
-
-       if (ib_query_device(device->ib_device, dev_attr)) {
+       ret = ib_query_device(device->ib_device, dev_attr);
+       if (ret) {
                pr_warn("Query device failed for %s\n", device->ib_device->name);
-               goto dev_attr_err;
+               return ret;
        }
 
        /* Assign function handles  - based on FMR support */
@@ -94,14 +91,14 @@ static int iser_create_device_ib_res(struct iser_device *device)
                device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
        } else
        if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-               iser_info("FRWR supported, using FRWR for registration\n");
-               device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
-               device->iser_free_rdma_reg_res = iser_free_frwr_pool;
-               device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
-               device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+               iser_info("FastReg supported, using FastReg for registration\n");
+               device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
+               device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
+               device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
+               device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
        } else {
-               iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
-               goto dev_attr_err;
+               iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
+               return -1;
        }
 
        device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
@@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device)
        if (ib_register_event_handler(&device->event_handler))
                goto handler_err;
 
-       kfree(dev_attr);
        return 0;
 
 handler_err:
@@ -178,8 +174,6 @@ pd_err:
        kfree(device->cq_desc);
 cq_desc_err:
        iser_err("failed to allocate an IB resource\n");
-dev_attr_err:
-       kfree(dev_attr);
        return -1;
 }
 
@@ -221,13 +215,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
        struct ib_fmr_pool_param params;
        int ret = -ENOMEM;
 
-       ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
-                                               (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
-                                               GFP_KERNEL);
-       if (!ib_conn->fastreg.fmr.page_vec)
+       ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
+                                       (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+                                       GFP_KERNEL);
+       if (!ib_conn->fmr.page_vec)
                return ret;
 
-       ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
+       ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
 
        params.page_shift        = SHIFT_4K;
        /* when the first/last SG element are not start/end *
@@ -243,16 +237,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
                                    IB_ACCESS_REMOTE_WRITE |
                                    IB_ACCESS_REMOTE_READ);
 
-       ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, &params);
-       if (!IS_ERR(ib_conn->fastreg.fmr.pool))
+       ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
+       if (!IS_ERR(ib_conn->fmr.pool))
                return 0;
 
        /* no FMR => no need for page_vec */
-       kfree(ib_conn->fastreg.fmr.page_vec);
-       ib_conn->fastreg.fmr.page_vec = NULL;
+       kfree(ib_conn->fmr.page_vec);
+       ib_conn->fmr.page_vec = NULL;
 
-       ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
-       ib_conn->fastreg.fmr.pool = NULL;
+       ret = PTR_ERR(ib_conn->fmr.pool);
+       ib_conn->fmr.pool = NULL;
        if (ret != -ENOSYS) {
                iser_err("FMR allocation failed, err %d\n", ret);
                return ret;
@@ -268,93 +262,173 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
 void iser_free_fmr_pool(struct iser_conn *ib_conn)
 {
        iser_info("freeing conn %p fmr pool %p\n",
-                 ib_conn, ib_conn->fastreg.fmr.pool);
+                 ib_conn, ib_conn->fmr.pool);
+
+       if (ib_conn->fmr.pool != NULL)
+               ib_destroy_fmr_pool(ib_conn->fmr.pool);
+
+       ib_conn->fmr.pool = NULL;
+
+       kfree(ib_conn->fmr.page_vec);
+       ib_conn->fmr.page_vec = NULL;
+}
+
+static int
+iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
+                        bool pi_enable, struct fast_reg_descriptor *desc)
+{
+       int ret;
+
+       desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
+                                                     ISCSI_ISER_SG_TABLESIZE + 1);
+       if (IS_ERR(desc->data_frpl)) {
+               ret = PTR_ERR(desc->data_frpl);
+               iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
+                        ret);
+               return PTR_ERR(desc->data_frpl);
+       }
+
+       desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
+       if (IS_ERR(desc->data_mr)) {
+               ret = PTR_ERR(desc->data_mr);
+               iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+               goto fast_reg_mr_failure;
+       }
+       desc->reg_indicators |= ISER_DATA_KEY_VALID;
+
+       if (pi_enable) {
+               struct ib_mr_init_attr mr_init_attr = {0};
+               struct iser_pi_context *pi_ctx = NULL;
 
-       if (ib_conn->fastreg.fmr.pool != NULL)
-               ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
+               desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+               if (!desc->pi_ctx) {
+                       iser_err("Failed to allocate pi context\n");
+                       ret = -ENOMEM;
+                       goto pi_ctx_alloc_failure;
+               }
+               pi_ctx = desc->pi_ctx;
+
+               pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
+                                                   ISCSI_ISER_SG_TABLESIZE);
+               if (IS_ERR(pi_ctx->prot_frpl)) {
+                       ret = PTR_ERR(pi_ctx->prot_frpl);
+                       iser_err("Failed to allocate prot frpl ret=%d\n",
+                                ret);
+                       goto prot_frpl_failure;
+               }
+
+               pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
+                                               ISCSI_ISER_SG_TABLESIZE + 1);
+               if (IS_ERR(pi_ctx->prot_mr)) {
+                       ret = PTR_ERR(pi_ctx->prot_mr);
+                       iser_err("Failed to allocate prot frmr ret=%d\n",
+                                ret);
+                       goto prot_mr_failure;
+               }
+               desc->reg_indicators |= ISER_PROT_KEY_VALID;
+
+               mr_init_attr.max_reg_descriptors = 2;
+               mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
+               pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+               if (IS_ERR(pi_ctx->sig_mr)) {
+                       ret = PTR_ERR(pi_ctx->sig_mr);
+                       iser_err("Failed to allocate signature enabled mr err=%d\n",
+                                ret);
+                       goto sig_mr_failure;
+               }
+               desc->reg_indicators |= ISER_SIG_KEY_VALID;
+       }
+       desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
 
-       ib_conn->fastreg.fmr.pool = NULL;
+       iser_dbg("Create fr_desc %p page_list %p\n",
+                desc, desc->data_frpl->page_list);
 
-       kfree(ib_conn->fastreg.fmr.page_vec);
-       ib_conn->fastreg.fmr.page_vec = NULL;
+       return 0;
+sig_mr_failure:
+       ib_dereg_mr(desc->pi_ctx->prot_mr);
+prot_mr_failure:
+       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+prot_frpl_failure:
+       kfree(desc->pi_ctx);
+pi_ctx_alloc_failure:
+       ib_dereg_mr(desc->data_mr);
+fast_reg_mr_failure:
+       ib_free_fast_reg_page_list(desc->data_frpl);
+
+       return ret;
 }
 
 /**
- * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
  * for fast registration work requests.
  * returns 0 on success, or errno code on failure
  */
-int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)
 {
        struct iser_device      *device = ib_conn->device;
        struct fast_reg_descriptor      *desc;
        int i, ret;
 
-       INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
-       ib_conn->fastreg.frwr.pool_size = 0;
+       INIT_LIST_HEAD(&ib_conn->fastreg.pool);
+       ib_conn->fastreg.pool_size = 0;
        for (i = 0; i < cmds_max; i++) {
-               desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+               desc = kzalloc(sizeof(*desc), GFP_KERNEL);
                if (!desc) {
                        iser_err("Failed to allocate a new fast_reg descriptor\n");
                        ret = -ENOMEM;
                        goto err;
                }
 
-               desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
-                                                        ISCSI_ISER_SG_TABLESIZE + 1);
-               if (IS_ERR(desc->data_frpl)) {
-                       ret = PTR_ERR(desc->data_frpl);
-                       iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret);
-                       goto fast_reg_page_failure;
+               ret = iser_create_fastreg_desc(device->ib_device, device->pd,
+                                              ib_conn->pi_support, desc);
+               if (ret) {
+                       iser_err("Failed to create fastreg descriptor err=%d\n",
+                                ret);
+                       kfree(desc);
+                       goto err;
                }
 
-               desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
-                                                    ISCSI_ISER_SG_TABLESIZE + 1);
-               if (IS_ERR(desc->data_mr)) {
-                       ret = PTR_ERR(desc->data_mr);
-                       iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
-                       goto fast_reg_mr_failure;
-               }
-               desc->valid = true;
-               list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
-               ib_conn->fastreg.frwr.pool_size++;
+               list_add_tail(&desc->list, &ib_conn->fastreg.pool);
+               ib_conn->fastreg.pool_size++;
        }
 
        return 0;
 
-fast_reg_mr_failure:
-       ib_free_fast_reg_page_list(desc->data_frpl);
-fast_reg_page_failure:
-       kfree(desc);
 err:
-       iser_free_frwr_pool(ib_conn);
+       iser_free_fastreg_pool(ib_conn);
        return ret;
 }
 
 /**
- * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
  */
-void iser_free_frwr_pool(struct iser_conn *ib_conn)
+void iser_free_fastreg_pool(struct iser_conn *ib_conn)
 {
        struct fast_reg_descriptor *desc, *tmp;
        int i = 0;
 
-       if (list_empty(&ib_conn->fastreg.frwr.pool))
+       if (list_empty(&ib_conn->fastreg.pool))
                return;
 
-       iser_info("freeing conn %p frwr pool\n", ib_conn);
+       iser_info("freeing conn %p fr pool\n", ib_conn);
 
-       list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+       list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
                list_del(&desc->list);
                ib_free_fast_reg_page_list(desc->data_frpl);
                ib_dereg_mr(desc->data_mr);
+               if (desc->pi_ctx) {
+                       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+                       ib_dereg_mr(desc->pi_ctx->prot_mr);
+                       ib_destroy_mr(desc->pi_ctx->sig_mr);
+                       kfree(desc->pi_ctx);
+               }
                kfree(desc);
                ++i;
        }
 
-       if (i < ib_conn->fastreg.frwr.pool_size)
+       if (i < ib_conn->fastreg.pool_size)
                iser_warn("pool still has %d regions registered\n",
-                         ib_conn->fastreg.frwr.pool_size - i);
+                         ib_conn->fastreg.pool_size - i);
 }
 
 /**
@@ -389,12 +463,17 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
        init_attr.qp_context    = (void *)ib_conn;
        init_attr.send_cq       = device->tx_cq[min_index];
        init_attr.recv_cq       = device->rx_cq[min_index];
-       init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
        init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
        init_attr.cap.max_send_sge = 2;
        init_attr.cap.max_recv_sge = 1;
        init_attr.sq_sig_type   = IB_SIGNAL_REQ_WR;
        init_attr.qp_type       = IB_QPT_RC;
+       if (ib_conn->pi_support) {
+               init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS;
+               init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+       } else {
+               init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
+       }
 
        ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
        if (ret)
@@ -591,6 +670,19 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
        ib_conn = (struct iser_conn *)cma_id->context;
        ib_conn->device = device;
 
+       /* connection T10-PI support */
+       if (iser_pi_enable) {
+               if (!(device->dev_attr.device_cap_flags &
+                     IB_DEVICE_SIGNATURE_HANDOVER)) {
+                       iser_warn("T10-PI requested but not supported on %s, "
+                                 "continue without T10-PI\n",
+                                 ib_conn->device->ib_device->name);
+                       ib_conn->pi_support = false;
+               } else {
+                       ib_conn->pi_support = true;
+               }
+       }
+
        ret = rdma_resolve_route(cma_id, 1000);
        if (ret) {
                iser_err("resolve route failed: %d\n", ret);
@@ -636,6 +728,11 @@ failure:
 static void iser_connected_handler(struct rdma_cm_id *cma_id)
 {
        struct iser_conn *ib_conn;
+       struct ib_qp_attr attr;
+       struct ib_qp_init_attr init_attr;
+
+       (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
+       iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
        ib_conn = (struct iser_conn *)cma_id->context;
        ib_conn->state = ISER_CONN_UP;
@@ -653,9 +750,8 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
         * terminated asynchronously from the iSCSI layer's perspective.  */
        if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
                                        ISER_CONN_TERMINATING)){
-               if (ib_conn->iser_conn)
-                       iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
-                                          ISCSI_ERR_CONN_FAILED);
+               if (ib_conn->iscsi_conn)
+                       iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED);
                else
                        iser_err("iscsi_iser connection isn't bound\n");
        }
@@ -801,7 +897,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
        page_list = page_vec->pages;
        io_addr   = page_list[0];
 
-       mem  = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
+       mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
                                    page_list,
                                    page_vec->length,
                                    io_addr);
@@ -855,11 +951,11 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
        reg->mem_h = NULL;
 }
 
-void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
-                        enum iser_data_dir cmd_dir)
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+                           enum iser_data_dir cmd_dir)
 {
        struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
-       struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+       struct iser_conn *ib_conn = iser_task->ib_conn;
        struct fast_reg_descriptor *desc = reg->mem_h;
 
        if (!reg->is_mr)
@@ -868,7 +964,7 @@ void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
        reg->mem_h = NULL;
        reg->is_mr = 0;
        spin_lock_bh(&ib_conn->lock);
-       list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+       list_add_tail(&desc->list, &ib_conn->fastreg.pool);
        spin_unlock_bh(&ib_conn->lock);
 }
 
@@ -969,7 +1065,7 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
                 * perspective.                                             */
                if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
                    ISER_CONN_TERMINATING))
-                       iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
+                       iscsi_conn_failure(ib_conn->iscsi_conn,
                                           ISCSI_ERR_CONN_FAILED);
 
                /* no more non completed posts to the QP, complete the
@@ -993,18 +1089,16 @@ static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)
                if (wc.status == IB_WC_SUCCESS) {
                        if (wc.opcode == IB_WC_SEND)
                                iser_snd_completion(tx_desc, ib_conn);
-                       else if (wc.opcode == IB_WC_LOCAL_INV ||
-                                wc.opcode == IB_WC_FAST_REG_MR) {
-                               atomic_dec(&ib_conn->post_send_buf_count);
-                               continue;
-                       } else
+                       else
                                iser_err("expected opcode %d got %d\n",
                                        IB_WC_SEND, wc.opcode);
                } else {
                        iser_err("tx id %llx status %d vend_err %x\n",
-                               wc.wr_id, wc.status, wc.vendor_err);
-                       atomic_dec(&ib_conn->post_send_buf_count);
-                       iser_handle_comp_error(tx_desc, ib_conn);
+                                wc.wr_id, wc.status, wc.vendor_err);
+                       if (wc.wr_id != ISER_FASTREG_LI_WRID) {
+                               atomic_dec(&ib_conn->post_send_buf_count);
+                               iser_handle_comp_error(tx_desc, ib_conn);
+                       }
                }
                completed_tx++;
        }
@@ -1022,8 +1116,12 @@ static void iser_cq_tasklet_fn(unsigned long data)
         struct iser_rx_desc *desc;
         unsigned long       xfer_len;
        struct iser_conn *ib_conn;
-       int completed_tx, completed_rx;
-       completed_tx = completed_rx = 0;
+       int completed_tx, completed_rx = 0;
+
+       /* First do tx drain, so in a case where we have rx flushes and a successful
+        * tx completion we will still go through completion error handling.
+        */
+       completed_tx = iser_drain_tx_cq(device, cq_index);
 
        while (ib_poll_cq(cq, 1, &wc) == 1) {
                desc     = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
@@ -1051,7 +1149,6 @@ static void iser_cq_tasklet_fn(unsigned long data)
         * " would not cause interrupts to be missed"                       */
        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 
-       completed_tx += iser_drain_tx_cq(device, cq_index);
        iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
 }
 
@@ -1063,3 +1160,51 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
 
        tasklet_schedule(&device->cq_tasklet[cq_index]);
 }
+
+u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
+                            enum iser_data_dir cmd_dir, sector_t *sector)
+{
+       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+       struct fast_reg_descriptor *desc = reg->mem_h;
+       unsigned long sector_size = iser_task->sc->device->sector_size;
+       struct ib_mr_status mr_status;
+       int ret;
+
+       if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
+               desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+               ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
+                                        IB_MR_CHECK_SIG_STATUS, &mr_status);
+               if (ret) {
+                       pr_err("ib_check_mr_status failed, ret %d\n", ret);
+                       goto err;
+               }
+
+               if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+                       sector_t sector_off = mr_status.sig_err.sig_err_offset;
+
+                       do_div(sector_off, sector_size + 8);
+                       *sector = scsi_get_lba(iser_task->sc) + sector_off;
+
+                       pr_err("PI error found type %d at sector %llx "
+                              "expected %x vs actual %x\n",
+                              mr_status.sig_err.err_type,
+                              (unsigned long long)*sector,
+                              mr_status.sig_err.expected,
+                              mr_status.sig_err.actual);
+
+                       switch (mr_status.sig_err.err_type) {
+                       case IB_SIG_BAD_GUARD:
+                               return 0x1;
+                       case IB_SIG_BAD_REFTAG:
+                               return 0x3;
+                       case IB_SIG_BAD_APPTAG:
+                               return 0x2;
+                       }
+               }
+       }
+
+       return 0;
+err:
+       /* Not alot we can do here, return ambiguous guard error */
+       return 0x1;
+}
index 529b6bcdca7a5267570a81c4b031a069b8692235..66a908bf3fb9e1d43654d07af1d346cca12a6223 100644 (file)
@@ -411,6 +411,8 @@ static void srp_path_rec_completion(int status,
 
 static int srp_lookup_path(struct srp_target_port *target)
 {
+       int ret;
+
        target->path.numb_path = 1;
 
        init_completion(&target->done);
@@ -431,7 +433,9 @@ static int srp_lookup_path(struct srp_target_port *target)
        if (target->path_query_id < 0)
                return target->path_query_id;
 
-       wait_for_completion(&target->done);
+       ret = wait_for_completion_interruptible(&target->done);
+       if (ret < 0)
+               return ret;
 
        if (target->status < 0)
                shost_printk(KERN_WARNING, target->scsi_host,
@@ -710,7 +714,9 @@ static int srp_connect_target(struct srp_target_port *target)
                ret = srp_send_req(target);
                if (ret)
                        return ret;
-               wait_for_completion(&target->done);
+               ret = wait_for_completion_interruptible(&target->done);
+               if (ret < 0)
+                       return ret;
 
                /*
                 * The CM event handling code will set status to
@@ -777,6 +783,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
  * srp_claim_req - Take ownership of the scmnd associated with a request.
  * @target: SRP target port.
  * @req: SRP request.
+ * @sdev: If not NULL, only take ownership for this SCSI device.
  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
  *         ownership of @req->scmnd if it equals @scmnd.
  *
@@ -785,16 +792,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
  */
 static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
                                       struct srp_request *req,
+                                      struct scsi_device *sdev,
                                       struct scsi_cmnd *scmnd)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&target->lock, flags);
-       if (!scmnd) {
+       if (req->scmnd &&
+           (!sdev || req->scmnd->device == sdev) &&
+           (!scmnd || req->scmnd == scmnd)) {
                scmnd = req->scmnd;
                req->scmnd = NULL;
-       } else if (req->scmnd == scmnd) {
-               req->scmnd = NULL;
        } else {
                scmnd = NULL;
        }
@@ -821,9 +829,10 @@ static void srp_free_req(struct srp_target_port *target,
 }
 
 static void srp_finish_req(struct srp_target_port *target,
-                          struct srp_request *req, int result)
+                          struct srp_request *req, struct scsi_device *sdev,
+                          int result)
 {
-       struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
+       struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);
 
        if (scmnd) {
                srp_free_req(target, req, scmnd, 0);
@@ -835,11 +844,20 @@ static void srp_finish_req(struct srp_target_port *target,
 static void srp_terminate_io(struct srp_rport *rport)
 {
        struct srp_target_port *target = rport->lld_data;
+       struct Scsi_Host *shost = target->scsi_host;
+       struct scsi_device *sdev;
        int i;
 
+       /*
+        * Invoking srp_terminate_io() while srp_queuecommand() is running
+        * is not safe. Hence the warning statement below.
+        */
+       shost_for_each_device(sdev, shost)
+               WARN_ON_ONCE(sdev->request_queue->request_fn_active);
+
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
+               srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16);
        }
 }
 
@@ -876,7 +894,7 @@ static int srp_rport_reconnect(struct srp_rport *rport)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               srp_finish_req(target, req, DID_RESET << 16);
+               srp_finish_req(target, req, NULL, DID_RESET << 16);
        }
 
        INIT_LIST_HEAD(&target->free_tx);
@@ -1284,7 +1302,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
                complete(&target->tsk_mgmt_done);
        } else {
                req = &target->req_ring[rsp->tag];
-               scmnd = srp_claim_req(target, req, NULL);
+               scmnd = srp_claim_req(target, req, NULL, NULL);
                if (!scmnd) {
                        shost_printk(KERN_ERR, target->scsi_host,
                                     "Null scmnd for RSP w/tag %016llx\n",
@@ -1804,8 +1822,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
                                shost_printk(KERN_WARNING, shost,
                                             PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
                        else
-                               shost_printk(KERN_WARNING, shost,
-                                           PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
+                               shost_printk(KERN_WARNING, shost, PFX
+                                            "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
+                                            target->path.sgid.raw,
+                                            target->orig_dgid, reason);
                } else
                        shost_printk(KERN_WARNING, shost,
                                     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
@@ -1863,6 +1883,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
        case IB_CM_TIMEWAIT_EXIT:
                shost_printk(KERN_ERR, target->scsi_host,
                             PFX "connection closed\n");
+               comp = 1;
 
                target->status = 0;
                break;
@@ -1999,7 +2020,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
 
        shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
 
-       if (!req || !srp_claim_req(target, req, scmnd))
+       if (!req || !srp_claim_req(target, req, NULL, scmnd))
                return SUCCESS;
        if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
                              SRP_TSK_ABORT_TASK) == 0)
@@ -2030,8 +2051,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               if (req->scmnd && req->scmnd->device == scmnd->device)
-                       srp_finish_req(target, req, DID_RESET << 16);
+               srp_finish_req(target, req, scmnd->device, DID_RESET << 16);
        }
 
        return SUCCESS;
@@ -2612,6 +2632,8 @@ static ssize_t srp_create_target(struct device *dev,
        target->tl_retry_count  = 7;
        target->queue_size      = SRP_DEFAULT_QUEUE_SIZE;
 
+       mutex_lock(&host->add_target_mutex);
+
        ret = srp_parse_options(buf, target);
        if (ret)
                goto err;
@@ -2649,16 +2671,9 @@ static ssize_t srp_create_target(struct device *dev,
        if (ret)
                goto err_free_mem;
 
-       ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
-
-       shost_printk(KERN_DEBUG, target->scsi_host, PFX
-                    "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
-                    "service_id %016llx dgid %pI6\n",
-              (unsigned long long) be64_to_cpu(target->id_ext),
-              (unsigned long long) be64_to_cpu(target->ioc_guid),
-              be16_to_cpu(target->path.pkey),
-              (unsigned long long) be64_to_cpu(target->service_id),
-              target->path.dgid.raw);
+       ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
+       if (ret)
+               goto err_free_mem;
 
        ret = srp_create_target_ib(target);
        if (ret)
@@ -2679,7 +2694,19 @@ static ssize_t srp_create_target(struct device *dev,
        if (ret)
                goto err_disconnect;
 
-       return count;
+       shost_printk(KERN_DEBUG, target->scsi_host, PFX
+                    "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
+                    be64_to_cpu(target->id_ext),
+                    be64_to_cpu(target->ioc_guid),
+                    be16_to_cpu(target->path.pkey),
+                    be64_to_cpu(target->service_id),
+                    target->path.sgid.raw, target->path.dgid.raw);
+
+       ret = count;
+
+out:
+       mutex_unlock(&host->add_target_mutex);
+       return ret;
 
 err_disconnect:
        srp_disconnect_target(target);
@@ -2695,8 +2722,7 @@ err_free_mem:
 
 err:
        scsi_host_put(target_host);
-
-       return ret;
+       goto out;
 }
 
 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
@@ -2732,6 +2758,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
        INIT_LIST_HEAD(&host->target_list);
        spin_lock_init(&host->target_lock);
        init_completion(&host->released);
+       mutex_init(&host->add_target_mutex);
        host->srp_dev = device;
        host->port = port;
 
index 575681063f38b11fac26815e7c5e2a1fe6db5b42..aad27b7b4a4624bc040ef06e266d61bd537211d7 100644 (file)
@@ -105,6 +105,7 @@ struct srp_host {
        spinlock_t              target_lock;
        struct completion       released;
        struct list_head        list;
+       struct mutex            add_target_mutex;
 };
 
 struct srp_request {
index 9cd5415fe017dc359d2aae2e679eddc40e75f5e0..aa7f94375108dd661774fd30339341fa4c1fe1f8 100644 (file)
@@ -35,6 +35,12 @@ static void _be_roce_dev_add(struct be_adapter *adapter)
 
        if (!ocrdma_drv)
                return;
+
+       if (ocrdma_drv->be_abi_version != BE_ROCE_ABI_VERSION) {
+               dev_warn(&pdev->dev, "Cannot initialize RoCE due to ocrdma ABI mismatch\n");
+               return;
+       }
+
        if (pdev->device == OC_DEVICE_ID5) {
                /* only msix is supported on these devices */
                if (!msix_enabled(adapter))
index 2cd1129e19af960185a3f706d9ab5ed165e409a7..1bfb16164df88c80cde462555a0e1cf07445f0e5 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 
+#define BE_ROCE_ABI_VERSION    1
+
 struct ocrdma_dev;
 
 enum be_interrupt_mode {
@@ -52,6 +54,7 @@ struct be_dev_info {
 /* ocrdma driver register's the callback functions with nic driver. */
 struct ocrdma_driver {
        unsigned char name[32];
+       u32 be_abi_version;
        struct ocrdma_dev *(*add) (struct be_dev_info *dev_info);
        void (*remove) (struct ocrdma_dev *);
        void (*state_change_handler) (struct ocrdma_dev *, u32 new_state);
index 40462415291e8e79316951417e06715d34314a48..3c11acf67849c9aa05a78b419f63f2570d0a5417 100644 (file)
@@ -395,6 +395,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
                if (rc)
                        return rc;
        }
+
+       if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL)
+               task->protected = true;
+
        if (sc->sc_data_direction == DMA_TO_DEVICE) {
                unsigned out_len = scsi_out(sc)->length;
                struct iscsi_r2t_info *r2t = &task->unsol_r2t;
@@ -823,6 +827,33 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 
        sc->result = (DID_OK << 16) | rhdr->cmd_status;
 
+       if (task->protected) {
+               sector_t sector;
+               u8 ascq;
+
+               /**
+                * Transports that didn't implement check_protection
+                * callback but still published T10-PI support to scsi-mid
+                * deserve this BUG_ON.
+                **/
+               BUG_ON(!session->tt->check_protection);
+
+               ascq = session->tt->check_protection(task, &sector);
+               if (ascq) {
+                       sc->result = DRIVER_SENSE << 24 |
+                                    SAM_STAT_CHECK_CONDITION;
+                       scsi_build_sense_buffer(1, sc->sense_buffer,
+                                               ILLEGAL_REQUEST, 0x10, ascq);
+                       sc->sense_buffer[7] = 0xc; /* Additional sense length */
+                       sc->sense_buffer[8] = 0;   /* Information desc type */
+                       sc->sense_buffer[9] = 0xa; /* Additional desc length */
+                       sc->sense_buffer[10] = 0x80; /* Validity bit */
+
+                       put_unaligned_be64(sector, &sc->sense_buffer[12]);
+                       goto out;
+               }
+       }
+
        if (rhdr->response != ISCSI_STATUS_CMD_COMPLETED) {
                sc->result = DID_ERROR << 16;
                goto out;
@@ -1567,6 +1598,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
        task->have_checked_conn = false;
        task->last_timeout = jiffies;
        task->last_xfer = jiffies;
+       task->protected = false;
        INIT_LIST_HEAD(&task->running);
        return task;
 }
index d47ffc8d3e431dc2524e30c378cd2fabb6406b74..13e898332e45b066617b3089ac41d4efdad3217d 100644 (file)
@@ -810,6 +810,7 @@ EXPORT_SYMBOL_GPL(srp_remove_host);
 
 /**
  * srp_stop_rport_timers - stop the transport layer recovery timers
+ * @rport: SRP remote port for which to stop the timers.
  *
  * Must be called after srp_remove_host() and scsi_remove_host(). The caller
  * must hold a reference on the rport (rport->dev) and on the SCSI host
index f29e3a27c2cccc6a22bbbe0037417e4fe234a04b..0e3ff30647d518483853cb9c52104991df966221 100644 (file)
@@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param {
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
                        struct ib_cm_sidr_rep_param *param);
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
 #endif /* IB_CM_H */
index 82ab5c1e7605bedbfc2efc6be70cf624d2b8e784..acd825182977ca73ec0eb161af9d768a0dc365b9 100644 (file)
@@ -1412,10 +1412,6 @@ struct ib_dma_mapping_ops {
        void            (*unmap_sg)(struct ib_device *dev,
                                    struct scatterlist *sg, int nents,
                                    enum dma_data_direction direction);
-       u64             (*dma_address)(struct ib_device *dev,
-                                      struct scatterlist *sg);
-       unsigned int    (*dma_len)(struct ib_device *dev,
-                                  struct scatterlist *sg);
        void            (*sync_single_for_cpu)(struct ib_device *dev,
                                               u64 dma_handle,
                                               size_t size,
@@ -2240,12 +2236,13 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
  * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
  * @dev: The device for which the DMA addresses were created
  * @sg: The scatter/gather entry
+ *
+ * Note: this function is obsolete. To do: change all occurrences of
+ * ib_sg_dma_address() into sg_dma_address().
  */
 static inline u64 ib_sg_dma_address(struct ib_device *dev,
                                    struct scatterlist *sg)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->dma_address(dev, sg);
        return sg_dma_address(sg);
 }
 
@@ -2253,12 +2250,13 @@ static inline u64 ib_sg_dma_address(struct ib_device *dev,
  * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
  * @dev: The device for which the DMA addresses were created
  * @sg: The scatter/gather entry
+ *
+ * Note: this function is obsolete. To do: change all occurrences of
+ * ib_sg_dma_len() into sg_dma_len().
  */
 static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
                                         struct scatterlist *sg)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->dma_len(dev, sg);
        return sg_dma_len(sg);
 }
 
index 309f51336fb96d97b9eb4f599147320eb3d42617..1457c26dfc58605317b134ad193dc92c625203e4 100644 (file)
@@ -133,6 +133,10 @@ struct iscsi_task {
        unsigned long           last_xfer;
        unsigned long           last_timeout;
        bool                    have_checked_conn;
+
+       /* T10 protection information */
+       bool                    protected;
+
        /* state set/tested under session->lock */
        int                     state;
        atomic_t                refcount;
index 88640a47216cb7cb749c0a15c0c9e3774fc5ccc8..2555ee5343fd0a45dbec75f5d6305963b35eabdb 100644 (file)
@@ -167,6 +167,7 @@ struct iscsi_transport {
                                 struct iscsi_bus_flash_conn *fnode_conn);
        int (*logout_flashnode_sid) (struct iscsi_cls_session *cls_sess);
        int (*get_host_stats) (struct Scsi_Host *shost, char *buf, int len);
+       u8 (*check_protection)(struct iscsi_task *task, sector_t *sector);
 };
 
 /*
index b11da5c1331e3db786c4bb375ac0a5605aae622a..cdb05dd1d4401134fcef914cefaa1338eb629437 100644 (file)
@@ -41,7 +41,6 @@ enum srp_rport_state {
  * @mutex:             Protects against concurrent rport reconnect /
  *                     fast_io_fail / dev_loss_tmo activity.
  * @state:             rport state.
- * @deleted:           Whether or not srp_rport_del() has already been invoked.
  * @reconnect_delay:   Reconnect delay in seconds.
  * @failed_reconnects: Number of failed reconnect attempts.
  * @reconnect_work:    Work structure used for scheduling reconnect attempts.