Merge branches 'cxgb4', 'flowsteer', 'ipoib', 'iser', 'mlx4', 'ocrdma' and 'qib'...
authorRoland Dreier <roland@purestorage.com>
Tue, 3 Sep 2013 16:01:08 +0000 (09:01 -0700)
committerRoland Dreier <roland@purestorage.com>
Tue, 3 Sep 2013 16:01:08 +0000 (09:01 -0700)
45 files changed:
drivers/infiniband/core/cma.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/amso1100/c2_ae.c
drivers/infiniband/hw/amso1100/c2_cm.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb4/Kconfig
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/ev.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_abi.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.h
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
include/linux/mlx4/device.h
include/rdma/ib_verbs.h
include/rdma/iw_cm.h
include/uapi/rdma/ib_user_verbs.h

index 7c0f9535fb7d443bf2c4b1647b3494816292020a..3a2c3c3bf723f5ab7edca847bcb32c34ee95281d 100644 (file)
@@ -1385,8 +1385,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 {
        struct rdma_id_private *id_priv = iw_id->context;
        struct rdma_cm_event event;
-       struct sockaddr_in *sin;
        int ret = 0;
+       struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+       struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
        if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
                return 0;
@@ -1397,10 +1398,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
                event.event = RDMA_CM_EVENT_DISCONNECTED;
                break;
        case IW_CM_EVENT_CONNECT_REPLY:
-               sin = (struct sockaddr_in *) cma_src_addr(id_priv);
-               *sin = iw_event->local_addr;
-               sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
-               *sin = iw_event->remote_addr;
+               memcpy(cma_src_addr(id_priv), laddr,
+                      rdma_addr_size(laddr));
+               memcpy(cma_dst_addr(id_priv), raddr,
+                      rdma_addr_size(raddr));
                switch (iw_event->status) {
                case 0:
                        event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -1450,11 +1451,12 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 {
        struct rdma_cm_id *new_cm_id;
        struct rdma_id_private *listen_id, *conn_id;
-       struct sockaddr_in *sin;
        struct net_device *dev = NULL;
        struct rdma_cm_event event;
        int ret;
        struct ib_device_attr attr;
+       struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+       struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
        listen_id = cm_id->context;
        if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
@@ -1472,14 +1474,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
        conn_id->state = RDMA_CM_CONNECT;
 
-       dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
-       if (!dev) {
-               ret = -EADDRNOTAVAIL;
-               mutex_unlock(&conn_id->handler_mutex);
-               rdma_destroy_id(new_cm_id);
-               goto out;
-       }
-       ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+       ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
        if (ret) {
                mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
@@ -1497,10 +1492,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
        cm_id->context = conn_id;
        cm_id->cm_handler = cma_iw_handler;
 
-       sin = (struct sockaddr_in *) cma_src_addr(conn_id);
-       *sin = iw_event->local_addr;
-       sin = (struct sockaddr_in *) cma_dst_addr(conn_id);
-       *sin = iw_event->remote_addr;
+       memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
+       memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
 
        ret = ib_query_device(conn_id->id.device, &attr);
        if (ret) {
@@ -1576,7 +1569,6 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
 {
        int ret;
-       struct sockaddr_in *sin;
        struct iw_cm_id *id;
 
        id = iw_create_cm_id(id_priv->id.device,
@@ -1587,8 +1579,8 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
 
        id_priv->cm_id.iw = id;
 
-       sin = (struct sockaddr_in *) cma_src_addr(id_priv);
-       id_priv->cm_id.iw->local_addr = *sin;
+       memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
+              rdma_addr_size(cma_src_addr(id_priv)));
 
        ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
 
@@ -2803,7 +2795,6 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
                          struct rdma_conn_param *conn_param)
 {
        struct iw_cm_id *cm_id;
-       struct sockaddr_in* sin;
        int ret;
        struct iw_cm_conn_param iw_param;
 
@@ -2813,11 +2804,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
 
        id_priv->cm_id.iw = cm_id;
 
-       sin = (struct sockaddr_in *) cma_src_addr(id_priv);
-       cm_id->local_addr = *sin;
-
-       sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
-       cm_id->remote_addr = *sin;
+       memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
+              rdma_addr_size(cma_src_addr(id_priv)));
+       memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
+              rdma_addr_size(cma_dst_addr(id_priv)));
 
        ret = cma_modify_qp_rtr(id_priv, conn_param);
        if (ret)
index 0fcd7aa26fa2121d86d48e035798aadad407cce1..d040b877475f3f04f9f6c2fea63ba772fbd9586c 100644 (file)
@@ -135,6 +135,7 @@ struct ib_usrq_object {
 struct ib_uqp_object {
        struct ib_uevent_object uevent;
        struct list_head        mcast_list;
+       struct ib_uxrcd_object *uxrcd;
 };
 
 struct ib_ucq_object {
@@ -155,6 +156,7 @@ extern struct idr ib_uverbs_cq_idr;
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
 extern struct idr ib_uverbs_xrcd_idr;
+extern struct idr ib_uverbs_rule_idr;
 
 void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
 
@@ -215,5 +217,7 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
 IB_UVERBS_DECLARE_CMD(create_xsrq);
 IB_UVERBS_DECLARE_CMD(open_xrcd);
 IB_UVERBS_DECLARE_CMD(close_xrcd);
+IB_UVERBS_DECLARE_CMD(create_flow);
+IB_UVERBS_DECLARE_CMD(destroy_flow);
 
 #endif /* UVERBS_H */
index b3c07b0c9f2655bb13b9fb81c6ef38fc2efd8757..f2b81b9ee0d6849c67693e50c351721c64df27fe 100644 (file)
@@ -54,6 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
 static struct uverbs_lock_class ah_lock_class  = { .name = "AH-uobj" };
 static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
 static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)                      \
        do {                                                            \
@@ -330,6 +331,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        INIT_LIST_HEAD(&ucontext->srq_list);
        INIT_LIST_HEAD(&ucontext->ah_list);
        INIT_LIST_HEAD(&ucontext->xrcd_list);
+       INIT_LIST_HEAD(&ucontext->rule_list);
        ucontext->closing = 0;
 
        resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -1526,7 +1528,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
                   (unsigned long) cmd.response + sizeof resp,
                   in_len - sizeof cmd, out_len - sizeof resp);
 
-       obj = kmalloc(sizeof *obj, GFP_KERNEL);
+       obj = kzalloc(sizeof *obj, GFP_KERNEL);
        if (!obj)
                return -ENOMEM;
 
@@ -1642,8 +1644,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
                goto err_copy;
        }
 
-       if (xrcd)
+       if (xrcd) {
+               obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+                                         uobject);
+               atomic_inc(&obj->uxrcd->refcnt);
                put_xrcd_read(xrcd_uobj);
+       }
+
        if (pd)
                put_pd_read(pd);
        if (scq)
@@ -1753,6 +1760,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
                goto err_remove;
        }
 
+       obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+       atomic_inc(&obj->uxrcd->refcnt);
        put_xrcd_read(xrcd_uobj);
 
        mutex_lock(&file->mutex);
@@ -2019,6 +2028,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       if (obj->uxrcd)
+               atomic_dec(&obj->uxrcd->refcnt);
+
        idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 
        mutex_lock(&file->mutex);
@@ -2587,6 +2599,232 @@ out_put:
        return ret ? ret : in_len;
 }
 
+static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+                               union ib_flow_spec *ib_spec)
+{
+       ib_spec->type = kern_spec->type;
+
+       switch (ib_spec->type) {
+       case IB_FLOW_SPEC_ETH:
+               ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+               if (ib_spec->eth.size != kern_spec->eth.size)
+                       return -EINVAL;
+               memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+                      sizeof(struct ib_flow_eth_filter));
+               memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+                      sizeof(struct ib_flow_eth_filter));
+               break;
+       case IB_FLOW_SPEC_IPV4:
+               ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+               if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+                       return -EINVAL;
+               memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+                      sizeof(struct ib_flow_ipv4_filter));
+               memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+                      sizeof(struct ib_flow_ipv4_filter));
+               break;
+       case IB_FLOW_SPEC_TCP:
+       case IB_FLOW_SPEC_UDP:
+               ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+               if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+                       return -EINVAL;
+               memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+                      sizeof(struct ib_flow_tcp_udp_filter));
+               memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+                      sizeof(struct ib_flow_tcp_udp_filter));
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
+                             const char __user *buf, int in_len,
+                             int out_len)
+{
+       struct ib_uverbs_create_flow      cmd;
+       struct ib_uverbs_create_flow_resp resp;
+       struct ib_uobject                 *uobj;
+       struct ib_flow                    *flow_id;
+       struct ib_kern_flow_attr          *kern_flow_attr;
+       struct ib_flow_attr               *flow_attr;
+       struct ib_qp                      *qp;
+       int err = 0;
+       void *kern_spec;
+       void *ib_spec;
+       int i;
+       int kern_attr_size;
+
+       if (out_len < sizeof(resp))
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof(cmd)))
+               return -EFAULT;
+
+       if (cmd.comp_mask)
+               return -EINVAL;
+
+       if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
+            !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+               return -EPERM;
+
+       if (cmd.flow_attr.num_of_specs < 0 ||
+           cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+               return -EINVAL;
+
+       kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
+                        sizeof(struct ib_uverbs_cmd_hdr_ex);
+
+       if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
+           kern_attr_size < 0 || kern_attr_size >
+           (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+               return -EINVAL;
+
+       if (cmd.flow_attr.num_of_specs) {
+               kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+               if (!kern_flow_attr)
+                       return -ENOMEM;
+
+               memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
+               if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
+                                  kern_attr_size)) {
+                       err = -EFAULT;
+                       goto err_free_attr;
+               }
+       } else {
+               kern_flow_attr = &cmd.flow_attr;
+               kern_attr_size = sizeof(cmd.flow_attr);
+       }
+
+       uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+       if (!uobj) {
+               err = -ENOMEM;
+               goto err_free_attr;
+       }
+       init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+       down_write(&uobj->mutex);
+
+       qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+       if (!qp) {
+               err = -EINVAL;
+               goto err_uobj;
+       }
+
+       flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+       if (!flow_attr) {
+               err = -ENOMEM;
+               goto err_put;
+       }
+
+       flow_attr->type = kern_flow_attr->type;
+       flow_attr->priority = kern_flow_attr->priority;
+       flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
+       flow_attr->port = kern_flow_attr->port;
+       flow_attr->flags = kern_flow_attr->flags;
+       flow_attr->size = sizeof(*flow_attr);
+
+       kern_spec = kern_flow_attr + 1;
+       ib_spec = flow_attr + 1;
+       for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+               err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+               if (err)
+                       goto err_free;
+               flow_attr->size +=
+                       ((union ib_flow_spec *) ib_spec)->size;
+               kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size;
+               kern_spec += ((struct ib_kern_spec *) kern_spec)->size;
+               ib_spec += ((union ib_flow_spec *) ib_spec)->size;
+       }
+       if (kern_attr_size) {
+               pr_warn("create flow failed, %d bytes left from uverb cmd\n",
+                       kern_attr_size);
+               goto err_free;
+       }
+       flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+       if (IS_ERR(flow_id)) {
+               err = PTR_ERR(flow_id);
+               goto err_free;
+       }
+       flow_id->qp = qp;
+       flow_id->uobject = uobj;
+       uobj->object = flow_id;
+
+       err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+       if (err)
+               goto destroy_flow;
+
+       memset(&resp, 0, sizeof(resp));
+       resp.flow_handle = uobj->id;
+
+       if (copy_to_user((void __user *)(unsigned long) cmd.response,
+                        &resp, sizeof(resp))) {
+               err = -EFAULT;
+               goto err_copy;
+       }
+
+       put_qp_read(qp);
+       mutex_lock(&file->mutex);
+       list_add_tail(&uobj->list, &file->ucontext->rule_list);
+       mutex_unlock(&file->mutex);
+
+       uobj->live = 1;
+
+       up_write(&uobj->mutex);
+       kfree(flow_attr);
+       if (cmd.flow_attr.num_of_specs)
+               kfree(kern_flow_attr);
+       return in_len;
+err_copy:
+       idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+destroy_flow:
+       ib_destroy_flow(flow_id);
+err_free:
+       kfree(flow_attr);
+err_put:
+       put_qp_read(qp);
+err_uobj:
+       put_uobj_write(uobj);
+err_free_attr:
+       if (cmd.flow_attr.num_of_specs)
+               kfree(kern_flow_attr);
+       return err;
+}
+
+ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
+                              const char __user *buf, int in_len,
+                              int out_len) {
+       struct ib_uverbs_destroy_flow   cmd;
+       struct ib_flow                  *flow_id;
+       struct ib_uobject               *uobj;
+       int                             ret;
+
+       if (copy_from_user(&cmd, buf, sizeof(cmd)))
+               return -EFAULT;
+
+       uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
+                             file->ucontext);
+       if (!uobj)
+               return -EINVAL;
+       flow_id = uobj->object;
+
+       ret = ib_destroy_flow(flow_id);
+       if (!ret)
+               uobj->live = 0;
+
+       put_uobj_write(uobj);
+
+       idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+
+       mutex_lock(&file->mutex);
+       list_del(&uobj->list);
+       mutex_unlock(&file->mutex);
+
+       put_uobj(uobj);
+
+       return ret ? ret : in_len;
+}
+
 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
                                struct ib_uverbs_create_xsrq *cmd,
                                struct ib_udata *udata)
@@ -2860,6 +3098,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
        struct ib_srq                    *srq;
        struct ib_uevent_object          *obj;
        int                               ret = -EINVAL;
+       struct ib_usrq_object            *us;
+       enum ib_srq_type                  srq_type;
 
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
@@ -2869,6 +3109,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
                return -EINVAL;
        srq = uobj->object;
        obj = container_of(uobj, struct ib_uevent_object, uobject);
+       srq_type = srq->srq_type;
 
        ret = ib_destroy_srq(srq);
        if (!ret)
@@ -2879,6 +3120,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
+       if (srq_type == IB_SRQT_XRC) {
+               us = container_of(obj, struct ib_usrq_object, uevent);
+               atomic_dec(&us->uxrcd->refcnt);
+       }
+
        idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 
        mutex_lock(&file->mutex);
index 2c6f0f2ecd9d84ee5f3ae4e29f1636b3f863047a..75ad86c4abf82a86572d9ca8b35d52a7f9d2d4ce 100644 (file)
@@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr);
 DEFINE_IDR(ib_uverbs_qp_idr);
 DEFINE_IDR(ib_uverbs_srq_idr);
 DEFINE_IDR(ib_uverbs_xrcd_idr);
+DEFINE_IDR(ib_uverbs_rule_idr);
 
 static DEFINE_SPINLOCK(map_lock);
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -113,7 +114,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
        [IB_USER_VERBS_CMD_OPEN_XRCD]           = ib_uverbs_open_xrcd,
        [IB_USER_VERBS_CMD_CLOSE_XRCD]          = ib_uverbs_close_xrcd,
        [IB_USER_VERBS_CMD_CREATE_XSRQ]         = ib_uverbs_create_xsrq,
-       [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp
+       [IB_USER_VERBS_CMD_OPEN_QP]             = ib_uverbs_open_qp,
+       [IB_USER_VERBS_CMD_CREATE_FLOW]         = ib_uverbs_create_flow,
+       [IB_USER_VERBS_CMD_DESTROY_FLOW]        = ib_uverbs_destroy_flow
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -212,6 +215,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                kfree(uobj);
        }
 
+       list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
+               struct ib_flow *flow_id = uobj->object;
+
+               idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+               ib_destroy_flow(flow_id);
+               kfree(uobj);
+       }
+
        list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
                struct ib_qp *qp = uobj->object;
                struct ib_uqp_object *uqp =
@@ -583,9 +594,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof hdr))
                return -EFAULT;
 
-       if (hdr.in_words * 4 != count)
-               return -EINVAL;
-
        if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
            !uverbs_cmd_table[hdr.command])
                return -EINVAL;
@@ -597,8 +605,30 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
                return -ENOSYS;
 
-       return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
-                                            hdr.in_words * 4, hdr.out_words * 4);
+       if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
+               struct ib_uverbs_cmd_hdr_ex hdr_ex;
+
+               if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
+                       return -EFAULT;
+
+               if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
+                       return -EINVAL;
+
+               return uverbs_cmd_table[hdr.command](file,
+                                                    buf + sizeof(hdr_ex),
+                                                    (hdr_ex.in_words +
+                                                     hdr_ex.provider_in_words) * 4,
+                                                    (hdr_ex.out_words +
+                                                     hdr_ex.provider_out_words) * 4);
+       } else {
+               if (hdr.in_words * 4 != count)
+                       return -EINVAL;
+
+               return uverbs_cmd_table[hdr.command](file,
+                                                    buf + sizeof(hdr),
+                                                    hdr.in_words * 4,
+                                                    hdr.out_words * 4);
+       }
 }
 
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
index 22192deb88282b51f195521e6468840655bb219e..a321df28bab2696404e76a365c861637867b68d2 100644 (file)
@@ -346,10 +346,13 @@ EXPORT_SYMBOL(ib_destroy_srq);
 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
 {
        struct ib_qp *qp = context;
+       unsigned long flags;
 
+       spin_lock_irqsave(&qp->device->event_handler_lock, flags);
        list_for_each_entry(event->element.qp, &qp->open_list, open_list)
                if (event->element.qp->event_handler)
                        event->element.qp->event_handler(event, event->element.qp->qp_context);
+       spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
 }
 
 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
@@ -1254,3 +1257,30 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
        return xrcd->device->dealloc_xrcd(xrcd);
 }
 EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+                              struct ib_flow_attr *flow_attr,
+                              int domain)
+{
+       struct ib_flow *flow_id;
+       if (!qp->device->create_flow)
+               return ERR_PTR(-ENOSYS);
+
+       flow_id = qp->device->create_flow(qp, flow_attr, domain);
+       if (!IS_ERR(flow_id))
+               atomic_inc(&qp->usecnt);
+       return flow_id;
+}
+EXPORT_SYMBOL(ib_create_flow);
+
+int ib_destroy_flow(struct ib_flow *flow_id)
+{
+       int err;
+       struct ib_qp *qp = flow_id->qp;
+
+       err = qp->device->destroy_flow(flow_id);
+       if (!err)
+               atomic_dec(&qp->usecnt);
+       return err;
+}
+EXPORT_SYMBOL(ib_destroy_flow);
index 706cf97cbe8f4e963581f322d47b76e1b78739b4..d5d1929753e4fdc95bbdc67625f5f0521ffeb32f 100644 (file)
@@ -155,6 +155,8 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
        enum c2_event_id event_id;
        unsigned long flags;
        int status;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
 
        /*
         * retrieve the message
@@ -206,10 +208,10 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
                case CCAE_ACTIVE_CONNECT_RESULTS:
                        res = &wr->ae.ae_active_connect_results;
                        cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-                       cm_event.local_addr.sin_addr.s_addr = res->laddr;
-                       cm_event.remote_addr.sin_addr.s_addr = res->raddr;
-                       cm_event.local_addr.sin_port = res->lport;
-                       cm_event.remote_addr.sin_port = res->rport;
+                       laddr->sin_addr.s_addr = res->laddr;
+                       raddr->sin_addr.s_addr = res->raddr;
+                       laddr->sin_port = res->lport;
+                       raddr->sin_port = res->rport;
                        if (status == 0) {
                                cm_event.private_data_len =
                                        be32_to_cpu(res->private_data_length);
@@ -281,10 +283,10 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
                }
                cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
                cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
-               cm_event.local_addr.sin_addr.s_addr = req->laddr;
-               cm_event.remote_addr.sin_addr.s_addr = req->raddr;
-               cm_event.local_addr.sin_port = req->lport;
-               cm_event.remote_addr.sin_port = req->rport;
+               laddr->sin_addr.s_addr = req->laddr;
+               raddr->sin_addr.s_addr = req->raddr;
+               laddr->sin_port = req->lport;
+               raddr->sin_port = req->rport;
                cm_event.private_data_len =
                        be32_to_cpu(req->private_data_length);
                cm_event.private_data = req->private_data;
index 95f58ab1e0b881d7913bc97c9f9ae0e2ac15ad43..23bfa94fbd4e4094b906c83f2504e00bc6ebab21 100644 (file)
@@ -46,6 +46,10 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
        struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
        struct c2_vq_req *vq_req;
        int err;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+       if (cm_id->remote_addr.ss_family != AF_INET)
+               return -ENOSYS;
 
        ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
        if (!ibqp)
@@ -91,8 +95,8 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
        wr->rnic_handle = c2dev->adapter_handle;
        wr->qp_handle = qp->adapter_handle;
 
-       wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
-       wr->remote_port = cm_id->remote_addr.sin_port;
+       wr->remote_addr = raddr->sin_addr.s_addr;
+       wr->remote_port = raddr->sin_port;
 
        /*
         * Move any private data from the callers's buf into
@@ -135,6 +139,10 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
        struct c2wr_ep_listen_create_rep *reply;
        struct c2_vq_req *vq_req;
        int err;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+
+       if (cm_id->local_addr.ss_family != AF_INET)
+               return -ENOSYS;
 
        c2dev = to_c2dev(cm_id->device);
        if (c2dev == NULL)
@@ -153,8 +161,8 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
        c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
        wr.hdr.context = (u64) (unsigned long) vq_req;
        wr.rnic_handle = c2dev->adapter_handle;
-       wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
-       wr.local_port = cm_id->local_addr.sin_port;
+       wr.local_addr = laddr->sin_addr.s_addr;
+       wr.local_port = laddr->sin_port;
        wr.backlog = cpu_to_be32(backlog);
        wr.user_context = (u64) (unsigned long) cm_id;
 
index 3e094cd6a0e345e1e239c96348e162cc53184fa7..095bb046e2c82eb8ded011eba72a486b376b444f 100644 (file)
@@ -721,8 +721,10 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_CONNECT_REPLY;
        event.status = status;
-       event.local_addr = ep->com.local_addr;
-       event.remote_addr = ep->com.remote_addr;
+       memcpy(&event.local_addr, &ep->com.local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&event.remote_addr, &ep->com.remote_addr,
+              sizeof(ep->com.remote_addr));
 
        if ((status == 0) || (status == -ECONNREFUSED)) {
                event.private_data_len = ep->plen;
@@ -747,8 +749,10 @@ static void connect_request_upcall(struct iwch_ep *ep)
        PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_CONNECT_REQUEST;
-       event.local_addr = ep->com.local_addr;
-       event.remote_addr = ep->com.remote_addr;
+       memcpy(&event.local_addr, &ep->com.local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&event.remote_addr, &ep->com.remote_addr,
+              sizeof(ep->com.local_addr));
        event.private_data_len = ep->plen;
        event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
        event.provider_data = ep;
@@ -1872,8 +1876,9 @@ err:
 static int is_loopback_dst(struct iw_cm_id *cm_id)
 {
        struct net_device *dev;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
 
-       dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr);
+       dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
        if (!dev)
                return 0;
        dev_put(dev);
@@ -1886,6 +1891,13 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct iwch_ep *ep;
        struct rtable *rt;
        int err = 0;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+       if (cm_id->remote_addr.ss_family != PF_INET) {
+               err = -ENOSYS;
+               goto out;
+       }
 
        if (is_loopback_dst(cm_id)) {
                err = -ENOSYS;
@@ -1929,11 +1941,9 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        }
 
        /* find a route */
-       rt = find_route(h->rdev.t3cdev_p,
-                       cm_id->local_addr.sin_addr.s_addr,
-                       cm_id->remote_addr.sin_addr.s_addr,
-                       cm_id->local_addr.sin_port,
-                       cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+       rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
+                       raddr->sin_addr.s_addr, laddr->sin_port,
+                       raddr->sin_port, IPTOS_LOWDELAY);
        if (!rt) {
                printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
                err = -EHOSTUNREACH;
@@ -1941,7 +1951,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        }
        ep->dst = &rt->dst;
        ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
-                            &cm_id->remote_addr.sin_addr.s_addr);
+                            &raddr->sin_addr.s_addr);
        if (!ep->l2t) {
                printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
                err = -ENOMEM;
@@ -1950,8 +1960,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        state_set(&ep->com, CONNECTING);
        ep->tos = IPTOS_LOWDELAY;
-       ep->com.local_addr = cm_id->local_addr;
-       ep->com.remote_addr = cm_id->remote_addr;
+       memcpy(&ep->com.local_addr, &cm_id->local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+              sizeof(ep->com.remote_addr));
 
        /* send connect request to rnic */
        err = send_connect(ep);
@@ -1979,6 +1991,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
 
        might_sleep();
 
+       if (cm_id->local_addr.ss_family != PF_INET) {
+               err = -ENOSYS;
+               goto fail1;
+       }
+
        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
        if (!ep) {
                printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
@@ -1990,7 +2007,8 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
        cm_id->add_ref(cm_id);
        ep->com.cm_id = cm_id;
        ep->backlog = backlog;
-       ep->com.local_addr = cm_id->local_addr;
+       memcpy(&ep->com.local_addr, &cm_id->local_addr,
+              sizeof(ep->com.local_addr));
 
        /*
         * Allocate a server TID.
index 6b7e6c543534e871867bee21a0e387ea98e3de54..d4e8983fba537d71b8da25b5b0768f088678722d 100644 (file)
@@ -1,6 +1,6 @@
 config INFINIBAND_CXGB4
        tristate "Chelsio T4 RDMA Driver"
-       depends on CHELSIO_T4 && INET
+       depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
        select GENERIC_ALLOCATOR
        ---help---
          This is an iWARP/RDMA driver for the Chelsio T4 1GbE and
index 65c30ea8c1a156f0f7d81bc4e039723b321be8dd..12fef76c791c524454bd9a0c48d5b4a967ac19af 100644 (file)
@@ -44,6 +44,8 @@
 #include <net/netevent.h>
 #include <net/route.h>
 #include <net/tcp.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
 
 #include "iw_cxgb4.h"
 
@@ -330,22 +332,80 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
        } else {
                skb = alloc_skb(len, gfp);
        }
+       t4_set_arp_err_handler(skb, NULL, NULL);
        return skb;
 }
 
-static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip,
+static struct net_device *get_real_dev(struct net_device *egress_dev)
+{
+       struct net_device *phys_dev = egress_dev;
+       if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
+               phys_dev = vlan_dev_real_dev(egress_dev);
+       return phys_dev;
+}
+
+static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
+{
+       int i;
+
+       egress_dev = get_real_dev(egress_dev);
+       for (i = 0; i < dev->rdev.lldi.nports; i++)
+               if (dev->rdev.lldi.ports[i] == egress_dev)
+                       return 1;
+       return 0;
+}
+
+static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
+                                    __u8 *peer_ip, __be16 local_port,
+                                    __be16 peer_port, u8 tos,
+                                    __u32 sin6_scope_id)
+{
+       struct dst_entry *dst = NULL;
+
+       if (IS_ENABLED(CONFIG_IPV6)) {
+               struct flowi6 fl6;
+
+               memset(&fl6, 0, sizeof(fl6));
+               memcpy(&fl6.daddr, peer_ip, 16);
+               memcpy(&fl6.saddr, local_ip, 16);
+               if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+                       fl6.flowi6_oif = sin6_scope_id;
+               dst = ip6_route_output(&init_net, NULL, &fl6);
+               if (!dst)
+                       goto out;
+               if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
+                   !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+                       dst_release(dst);
+                       dst = NULL;
+               }
+       }
+
+out:
+       return dst;
+}
+
+static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
                                 __be32 peer_ip, __be16 local_port,
                                 __be16 peer_port, u8 tos)
 {
        struct rtable *rt;
        struct flowi4 fl4;
+       struct neighbour *n;
 
        rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
                                   peer_port, local_port, IPPROTO_TCP,
                                   tos, 0);
        if (IS_ERR(rt))
                return NULL;
-       return rt;
+       n = dst_neigh_lookup(&rt->dst, &peer_ip);
+       if (!n)
+               return NULL;
+       if (!our_interface(dev, n->dev)) {
+               dst_release(&rt->dst);
+               return NULL;
+       }
+       neigh_release(n);
+       return &rt->dst;
 }
 
 static void arp_failure_discard(void *handle, struct sk_buff *skb)
@@ -487,7 +547,7 @@ static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst,
                        ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC;
                else {
                        ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC;
-                       ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC;
+                       ntuple |= 1 << FILTER_SEL_WIDTH_TAG_P_FC;
                }
                ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
                          FILTER_SEL_WIDTH_VLD_TAG_P_FC;
@@ -512,15 +572,28 @@ static int send_connect(struct c4iw_ep *ep)
 {
        struct cpl_act_open_req *req;
        struct cpl_t5_act_open_req *t5_req;
+       struct cpl_act_open_req6 *req6;
+       struct cpl_t5_act_open_req6 *t5_req6;
        struct sk_buff *skb;
        u64 opt0;
        u32 opt2;
        unsigned int mtu_idx;
        int wscale;
-       int size = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
-               sizeof(struct cpl_act_open_req) :
-               sizeof(struct cpl_t5_act_open_req);
-       int wrlen = roundup(size, 16);
+       int wrlen;
+       int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
+                               sizeof(struct cpl_act_open_req) :
+                               sizeof(struct cpl_t5_act_open_req);
+       int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
+                               sizeof(struct cpl_act_open_req6) :
+                               sizeof(struct cpl_t5_act_open_req6);
+       struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr;
+       struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr;
+       struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+       struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+
+       wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
+                       roundup(sizev4, 16) :
+                       roundup(sizev6, 16);
 
        PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
 
@@ -557,33 +630,82 @@ static int send_connect(struct c4iw_ep *ep)
        t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
 
        if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
-               req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
-               INIT_TP_WR(req, 0);
-               OPCODE_TID(req) = cpu_to_be32(
-                               MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
-                               ((ep->rss_qid << 14) | ep->atid)));
-               req->local_port = ep->com.local_addr.sin_port;
-               req->peer_port = ep->com.remote_addr.sin_port;
-               req->local_ip = ep->com.local_addr.sin_addr.s_addr;
-               req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
-               req->opt0 = cpu_to_be64(opt0);
-               req->params = cpu_to_be32(select_ntuple(ep->com.dev,
-                                       ep->dst, ep->l2t));
-               req->opt2 = cpu_to_be32(opt2);
+               if (ep->com.remote_addr.ss_family == AF_INET) {
+                       req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
+                       INIT_TP_WR(req, 0);
+                       OPCODE_TID(req) = cpu_to_be32(
+                                       MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+                                       ((ep->rss_qid << 14) | ep->atid)));
+                       req->local_port = la->sin_port;
+                       req->peer_port = ra->sin_port;
+                       req->local_ip = la->sin_addr.s_addr;
+                       req->peer_ip = ra->sin_addr.s_addr;
+                       req->opt0 = cpu_to_be64(opt0);
+                       req->params = cpu_to_be32(select_ntuple(ep->com.dev,
+                                               ep->dst, ep->l2t));
+                       req->opt2 = cpu_to_be32(opt2);
+               } else {
+                       req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
+
+                       INIT_TP_WR(req6, 0);
+                       OPCODE_TID(req6) = cpu_to_be32(
+                                          MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+                                          ((ep->rss_qid<<14)|ep->atid)));
+                       req6->local_port = la6->sin6_port;
+                       req6->peer_port = ra6->sin6_port;
+                       req6->local_ip_hi = *((__be64 *)
+                                               (la6->sin6_addr.s6_addr));
+                       req6->local_ip_lo = *((__be64 *)
+                                               (la6->sin6_addr.s6_addr + 8));
+                       req6->peer_ip_hi = *((__be64 *)
+                                               (ra6->sin6_addr.s6_addr));
+                       req6->peer_ip_lo = *((__be64 *)
+                                               (ra6->sin6_addr.s6_addr + 8));
+                       req6->opt0 = cpu_to_be64(opt0);
+                       req6->params = cpu_to_be32(
+                                       select_ntuple(ep->com.dev, ep->dst,
+                                                     ep->l2t));
+                       req6->opt2 = cpu_to_be32(opt2);
+               }
        } else {
-               t5_req = (struct cpl_t5_act_open_req *) skb_put(skb, wrlen);
-               INIT_TP_WR(t5_req, 0);
-               OPCODE_TID(t5_req) = cpu_to_be32(
+               if (ep->com.remote_addr.ss_family == AF_INET) {
+                       t5_req = (struct cpl_t5_act_open_req *)
+                                skb_put(skb, wrlen);
+                       INIT_TP_WR(t5_req, 0);
+                       OPCODE_TID(t5_req) = cpu_to_be32(
                                        MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
                                        ((ep->rss_qid << 14) | ep->atid)));
-               t5_req->local_port = ep->com.local_addr.sin_port;
-               t5_req->peer_port = ep->com.remote_addr.sin_port;
-               t5_req->local_ip = ep->com.local_addr.sin_addr.s_addr;
-               t5_req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
-               t5_req->opt0 = cpu_to_be64(opt0);
-               t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
-                               select_ntuple(ep->com.dev, ep->dst, ep->l2t)));
-               t5_req->opt2 = cpu_to_be32(opt2);
+                       t5_req->local_port = la->sin_port;
+                       t5_req->peer_port = ra->sin_port;
+                       t5_req->local_ip = la->sin_addr.s_addr;
+                       t5_req->peer_ip = ra->sin_addr.s_addr;
+                       t5_req->opt0 = cpu_to_be64(opt0);
+                       t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
+                                               select_ntuple(ep->com.dev,
+                                               ep->dst, ep->l2t)));
+                       t5_req->opt2 = cpu_to_be32(opt2);
+               } else {
+                       t5_req6 = (struct cpl_t5_act_open_req6 *)
+                                 skb_put(skb, wrlen);
+                       INIT_TP_WR(t5_req6, 0);
+                       OPCODE_TID(t5_req6) = cpu_to_be32(
+                                             MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+                                             ((ep->rss_qid<<14)|ep->atid)));
+                       t5_req6->local_port = la6->sin6_port;
+                       t5_req6->peer_port = ra6->sin6_port;
+                       t5_req6->local_ip_hi = *((__be64 *)
+                                               (la6->sin6_addr.s6_addr));
+                       t5_req6->local_ip_lo = *((__be64 *)
+                                               (la6->sin6_addr.s6_addr + 8));
+                       t5_req6->peer_ip_hi = *((__be64 *)
+                                               (ra6->sin6_addr.s6_addr));
+                       t5_req6->peer_ip_lo = *((__be64 *)
+                                               (ra6->sin6_addr.s6_addr + 8));
+                       t5_req6->opt0 = cpu_to_be64(opt0);
+                       t5_req6->params = (__force __be64)cpu_to_be32(
+                               select_ntuple(ep->com.dev, ep->dst, ep->l2t));
+                       t5_req6->opt2 = cpu_to_be32(opt2);
+               }
        }
 
        set_bit(ACT_OPEN_REQ, &ep->com.history);
@@ -952,8 +1074,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_CONNECT_REPLY;
        event.status = status;
-       event.local_addr = ep->com.local_addr;
-       event.remote_addr = ep->com.remote_addr;
+       memcpy(&event.local_addr, &ep->com.local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&event.remote_addr, &ep->com.remote_addr,
+              sizeof(ep->com.remote_addr));
 
        if ((status == 0) || (status == -ECONNREFUSED)) {
                if (!ep->tried_with_mpa_v1) {
@@ -989,8 +1113,10 @@ static void connect_request_upcall(struct c4iw_ep *ep)
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_CONNECT_REQUEST;
-       event.local_addr = ep->com.local_addr;
-       event.remote_addr = ep->com.remote_addr;
+       memcpy(&event.local_addr, &ep->com.local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&event.remote_addr, &ep->com.remote_addr,
+              sizeof(ep->com.remote_addr));
        event.provider_data = ep;
        if (!ep->tried_with_mpa_v1) {
                /* this means MPA_v2 is used */
@@ -1447,10 +1573,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
                               " qpid %u ep %p state %d tid %u status %d\n",
                               __func__, ep->com.qp->wq.sq.qid, ep,
                               state_read(&ep->com), ep->hwtid, status);
-               attrs.next_state = C4IW_QP_STATE_ERROR;
+               attrs.next_state = C4IW_QP_STATE_TERMINATE;
                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
-                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
-               c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
+                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
                break;
        }
        default:
@@ -1498,6 +1623,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
        struct fw_ofld_connection_wr *req;
        unsigned int mtu_idx;
        int wscale;
+       struct sockaddr_in *sin;
 
        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
        req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
@@ -1506,10 +1632,12 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
        req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
        req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst,
                                     ep->l2t));
-       req->le.lport = ep->com.local_addr.sin_port;
-       req->le.pport = ep->com.remote_addr.sin_port;
-       req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr;
-       req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr;
+       sin = (struct sockaddr_in *)&ep->com.local_addr;
+       req->le.lport = sin->sin_port;
+       req->le.u.ipv4.lip = sin->sin_addr.s_addr;
+       sin = (struct sockaddr_in *)&ep->com.remote_addr;
+       req->le.pport = sin->sin_port;
+       req->le.u.ipv4.pip = sin->sin_addr.s_addr;
        req->tcb.t_state_to_astid =
                        htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) |
                        V_FW_OFLD_CONNECTION_WR_ASTID(atid));
@@ -1560,14 +1688,98 @@ static inline int act_open_has_tid(int status)
 
 #define ACT_OPEN_RETRY_COUNT 2
 
+static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
+                    struct dst_entry *dst, struct c4iw_dev *cdev,
+                    bool clear_mpa_v1)
+{
+       struct neighbour *n;
+       int err, step;
+       struct net_device *pdev;
+
+       n = dst_neigh_lookup(dst, peer_ip);
+       if (!n)
+               return -ENODEV;
+
+       rcu_read_lock();
+       err = -ENOMEM;
+       if (n->dev->flags & IFF_LOOPBACK) {
+               if (iptype == 4)
+                       pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
+               else if (IS_ENABLED(CONFIG_IPV6))
+                       for_each_netdev(&init_net, pdev) {
+                               if (ipv6_chk_addr(&init_net,
+                                                 (struct in6_addr *)peer_ip,
+                                                 pdev, 1))
+                                       break;
+                       }
+               else
+                       pdev = NULL;
+
+               if (!pdev) {
+                       err = -ENODEV;
+                       goto out;
+               }
+               ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
+                                       n, pdev, 0);
+               if (!ep->l2t)
+                       goto out;
+               ep->mtu = pdev->mtu;
+               ep->tx_chan = cxgb4_port_chan(pdev);
+               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+               step = cdev->rdev.lldi.ntxq /
+                       cdev->rdev.lldi.nchan;
+               ep->txq_idx = cxgb4_port_idx(pdev) * step;
+               step = cdev->rdev.lldi.nrxq /
+                       cdev->rdev.lldi.nchan;
+               ep->ctrlq_idx = cxgb4_port_idx(pdev);
+               ep->rss_qid = cdev->rdev.lldi.rxq_ids[
+                       cxgb4_port_idx(pdev) * step];
+               dev_put(pdev);
+       } else {
+               pdev = get_real_dev(n->dev);
+               ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
+                                       n, pdev, 0);
+               if (!ep->l2t)
+                       goto out;
+               ep->mtu = dst_mtu(dst);
+               ep->tx_chan = cxgb4_port_chan(n->dev);
+               ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+               step = cdev->rdev.lldi.ntxq /
+                       cdev->rdev.lldi.nchan;
+               ep->txq_idx = cxgb4_port_idx(n->dev) * step;
+               ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+               step = cdev->rdev.lldi.nrxq /
+                       cdev->rdev.lldi.nchan;
+               ep->rss_qid = cdev->rdev.lldi.rxq_ids[
+                       cxgb4_port_idx(n->dev) * step];
+
+               if (clear_mpa_v1) {
+                       ep->retry_with_mpa_v1 = 0;
+                       ep->tried_with_mpa_v1 = 0;
+               }
+       }
+       err = 0;
+out:
+       rcu_read_unlock();
+
+       neigh_release(n);
+
+       return err;
+}
+
 static int c4iw_reconnect(struct c4iw_ep *ep)
 {
        int err = 0;
-       struct rtable *rt;
-       struct port_info *pi;
-       struct net_device *pdev;
-       int step;
-       struct neighbour *neigh;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)
+                                   &ep->com.cm_id->local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)
+                                   &ep->com.cm_id->remote_addr;
+       struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
+                                     &ep->com.cm_id->local_addr;
+       struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
+                                     &ep->com.cm_id->remote_addr;
+       int iptype;
+       __u8 *ra;
 
        PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
        init_timer(&ep->timer);
@@ -1584,57 +1796,28 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
        insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
 
        /* find a route */
-       rt = find_route(ep->com.dev,
-                       ep->com.cm_id->local_addr.sin_addr.s_addr,
-                       ep->com.cm_id->remote_addr.sin_addr.s_addr,
-                       ep->com.cm_id->local_addr.sin_port,
-                       ep->com.cm_id->remote_addr.sin_port, 0);
-       if (!rt) {
+       if (ep->com.cm_id->local_addr.ss_family == AF_INET) {
+               ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
+                                    raddr->sin_addr.s_addr, laddr->sin_port,
+                                    raddr->sin_port, 0);
+               iptype = 4;
+               ra = (__u8 *)&raddr->sin_addr;
+       } else {
+               ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
+                                     raddr6->sin6_addr.s6_addr,
+                                     laddr6->sin6_port, raddr6->sin6_port, 0,
+                                     raddr6->sin6_scope_id);
+               iptype = 6;
+               ra = (__u8 *)&raddr6->sin6_addr;
+       }
+       if (!ep->dst) {
                pr_err("%s - cannot find route.\n", __func__);
                err = -EHOSTUNREACH;
                goto fail3;
        }
-       ep->dst = &rt->dst;
-
-       neigh = dst_neigh_lookup(ep->dst,
-                       &ep->com.cm_id->remote_addr.sin_addr.s_addr);
-       if (!neigh) {
-               pr_err("%s - cannot alloc neigh.\n", __func__);
-               err = -ENOMEM;
-               goto fail4;
-       }
-
-       /* get a l2t entry */
-       if (neigh->dev->flags & IFF_LOOPBACK) {
-               PDBG("%s LOOPBACK\n", __func__);
-               pdev = ip_dev_find(&init_net,
-                               ep->com.cm_id->remote_addr.sin_addr.s_addr);
-               ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
-                               neigh, pdev, 0);
-               pi = (struct port_info *)netdev_priv(pdev);
-               ep->mtu = pdev->mtu;
-               ep->tx_chan = cxgb4_port_chan(pdev);
-               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
-               dev_put(pdev);
-       } else {
-               ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
-                               neigh, neigh->dev, 0);
-               pi = (struct port_info *)netdev_priv(neigh->dev);
-               ep->mtu = dst_mtu(ep->dst);
-               ep->tx_chan = cxgb4_port_chan(neigh->dev);
-               ep->smac_idx = (cxgb4_port_viid(neigh->dev) &
-                               0x7F) << 1;
-       }
-
-       step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan;
-       ep->txq_idx = pi->port_id * step;
-       ep->ctrlq_idx = pi->port_id;
-       step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan;
-       ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step];
-
-       if (!ep->l2t) {
+       err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
+       if (err) {
                pr_err("%s - cannot alloc l2e.\n", __func__);
-               err = -ENOMEM;
                goto fail4;
        }
 
@@ -1677,8 +1860,16 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
                                        ntohl(rpl->atid_status)));
        struct tid_info *t = dev->rdev.lldi.tids;
        int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status));
+       struct sockaddr_in *la;
+       struct sockaddr_in *ra;
+       struct sockaddr_in6 *la6;
+       struct sockaddr_in6 *ra6;
 
        ep = lookup_atid(t, atid);
+       la = (struct sockaddr_in *)&ep->com.local_addr;
+       ra = (struct sockaddr_in *)&ep->com.remote_addr;
+       la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+       ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
 
        PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
             status, status2errno(status));
@@ -1699,10 +1890,11 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
        case CPL_ERR_CONN_TIMEDOUT:
                break;
        case CPL_ERR_TCAM_FULL:
+               mutex_lock(&dev->rdev.stats.lock);
                dev->rdev.stats.tcam_full++;
-               if (dev->rdev.lldi.enable_fw_ofld_conn) {
-                       mutex_lock(&dev->rdev.stats.lock);
-                       mutex_unlock(&dev->rdev.stats.lock);
+               mutex_unlock(&dev->rdev.stats.lock);
+               if (ep->com.local_addr.ss_family == AF_INET &&
+                   dev->rdev.lldi.enable_fw_ofld_conn) {
                        send_fw_act_open_req(ep,
                                             GET_TID_TID(GET_AOPEN_ATID(
                                             ntohl(rpl->atid_status))));
@@ -1722,13 +1914,17 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
                }
                break;
        default:
-               printk(KERN_INFO MOD "Active open failure - "
-                      "atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
-                      atid, status, status2errno(status),
-                      &ep->com.local_addr.sin_addr.s_addr,
-                      ntohs(ep->com.local_addr.sin_port),
-                      &ep->com.remote_addr.sin_addr.s_addr,
-                      ntohs(ep->com.remote_addr.sin_port));
+               if (ep->com.local_addr.ss_family == AF_INET) {
+                       pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
+                               atid, status, status2errno(status),
+                               &la->sin_addr.s_addr, ntohs(la->sin_port),
+                               &ra->sin_addr.s_addr, ntohs(ra->sin_port));
+               } else {
+                       pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
+                               atid, status, status2errno(status),
+                               la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
+                               ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
+               }
                break;
        }
 
@@ -1766,27 +1962,6 @@ out:
        return 0;
 }
 
-static int listen_stop(struct c4iw_listen_ep *ep)
-{
-       struct sk_buff *skb;
-       struct cpl_close_listsvr_req *req;
-
-       PDBG("%s ep %p\n", __func__, ep);
-       skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-       if (!skb) {
-               printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
-               return -ENOMEM;
-       }
-       req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req));
-       INIT_TP_WR(req, 0);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
-                                                   ep->stid));
-       req->reply_ctrl = cpu_to_be16(
-                         QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0]));
-       set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
-       return c4iw_ofld_send(&ep->com.dev->rdev, skb);
-}
-
 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
        struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
@@ -1799,7 +1974,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
        return 0;
 }
 
-static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
+static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
                      struct cpl_pass_accept_req *req)
 {
        struct cpl_pass_accept_rpl *rpl;
@@ -1851,16 +2026,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
        rpl->opt0 = cpu_to_be64(opt0);
        rpl->opt2 = cpu_to_be32(opt2);
        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
+       t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
        c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 
        return;
 }
 
-static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
-                     struct sk_buff *skb)
+static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
 {
-       PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid,
-            peer_ip);
+       PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
        BUG_ON(skb_cloned(skb));
        skb_trim(skb, sizeof(struct cpl_tid_release));
        skb_get(skb);
@@ -1868,95 +2042,38 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
        return;
 }
 
-static void get_4tuple(struct cpl_pass_accept_req *req,
-                      __be32 *local_ip, __be32 *peer_ip,
+static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
+                      __u8 *local_ip, __u8 *peer_ip,
                       __be16 *local_port, __be16 *peer_port)
 {
        int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len));
        int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len));
        struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
+       struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
        struct tcphdr *tcp = (struct tcphdr *)
                             ((u8 *)(req + 1) + eth_len + ip_len);
 
-       PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
-            ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
-            ntohs(tcp->dest));
-
-       *peer_ip = ip->saddr;
-       *local_ip = ip->daddr;
+       if (ip->version == 4) {
+               PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
+                    ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
+                    ntohs(tcp->dest));
+               *iptype = 4;
+               memcpy(peer_ip, &ip->saddr, 4);
+               memcpy(local_ip, &ip->daddr, 4);
+       } else {
+               PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
+                    ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
+                    ntohs(tcp->dest));
+               *iptype = 6;
+               memcpy(peer_ip, ip6->saddr.s6_addr, 16);
+               memcpy(local_ip, ip6->daddr.s6_addr, 16);
+       }
        *peer_port = tcp->source;
        *local_port = tcp->dest;
 
        return;
 }
 
-static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
-                    struct c4iw_dev *cdev, bool clear_mpa_v1)
-{
-       struct neighbour *n;
-       int err, step;
-
-       n = dst_neigh_lookup(dst, &peer_ip);
-       if (!n)
-               return -ENODEV;
-
-       rcu_read_lock();
-       err = -ENOMEM;
-       if (n->dev->flags & IFF_LOOPBACK) {
-               struct net_device *pdev;
-
-               pdev = ip_dev_find(&init_net, peer_ip);
-               if (!pdev) {
-                       err = -ENODEV;
-                       goto out;
-               }
-               ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
-                                       n, pdev, 0);
-               if (!ep->l2t)
-                       goto out;
-               ep->mtu = pdev->mtu;
-               ep->tx_chan = cxgb4_port_chan(pdev);
-               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
-               step = cdev->rdev.lldi.ntxq /
-                       cdev->rdev.lldi.nchan;
-               ep->txq_idx = cxgb4_port_idx(pdev) * step;
-               step = cdev->rdev.lldi.nrxq /
-                       cdev->rdev.lldi.nchan;
-               ep->ctrlq_idx = cxgb4_port_idx(pdev);
-               ep->rss_qid = cdev->rdev.lldi.rxq_ids[
-                       cxgb4_port_idx(pdev) * step];
-               dev_put(pdev);
-       } else {
-               ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
-                                       n, n->dev, 0);
-               if (!ep->l2t)
-                       goto out;
-               ep->mtu = dst_mtu(dst);
-               ep->tx_chan = cxgb4_port_chan(n->dev);
-               ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
-               step = cdev->rdev.lldi.ntxq /
-                       cdev->rdev.lldi.nchan;
-               ep->txq_idx = cxgb4_port_idx(n->dev) * step;
-               ep->ctrlq_idx = cxgb4_port_idx(n->dev);
-               step = cdev->rdev.lldi.nrxq /
-                       cdev->rdev.lldi.nchan;
-               ep->rss_qid = cdev->rdev.lldi.rxq_ids[
-                       cxgb4_port_idx(n->dev) * step];
-
-               if (clear_mpa_v1) {
-                       ep->retry_with_mpa_v1 = 0;
-                       ep->tried_with_mpa_v1 = 0;
-               }
-       }
-       err = 0;
-out:
-       rcu_read_unlock();
-
-       neigh_release(n);
-
-       return err;
-}
-
 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 {
        struct c4iw_ep *child_ep = NULL, *parent_ep;
@@ -1965,23 +2082,17 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
        struct tid_info *t = dev->rdev.lldi.tids;
        unsigned int hwtid = GET_TID(req);
        struct dst_entry *dst;
-       struct rtable *rt;
-       __be32 local_ip, peer_ip = 0;
+       __u8 local_ip[16], peer_ip[16];
        __be16 local_port, peer_port;
        int err;
        u16 peer_mss = ntohs(req->tcpopt.mss);
+       int iptype;
 
        parent_ep = lookup_stid(t, stid);
        if (!parent_ep) {
                PDBG("%s connect request on invalid stid %d\n", __func__, stid);
                goto reject;
        }
-       get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
-
-       PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \
-            "rport %d peer_mss %d\n", __func__, parent_ep, hwtid,
-            ntohl(local_ip), ntohl(peer_ip), ntohs(local_port),
-            ntohs(peer_port), peer_mss);
 
        if (state_read(&parent_ep->com) != LISTEN) {
                printk(KERN_ERR "%s - listening ep not in LISTEN\n",
@@ -1989,15 +2100,32 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
                goto reject;
        }
 
+       get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
+
        /* Find output route */
-       rt = find_route(dev, local_ip, peer_ip, local_port, peer_port,
-                       GET_POPEN_TOS(ntohl(req->tos_stid)));
-       if (!rt) {
+       if (iptype == 4)  {
+               PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+                    , __func__, parent_ep, hwtid,
+                    local_ip, peer_ip, ntohs(local_port),
+                    ntohs(peer_port), peer_mss);
+               dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
+                                local_port, peer_port,
+                                GET_POPEN_TOS(ntohl(req->tos_stid)));
+       } else {
+               PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+                    , __func__, parent_ep, hwtid,
+                    local_ip, peer_ip, ntohs(local_port),
+                    ntohs(peer_port), peer_mss);
+               dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
+                                 PASS_OPEN_TOS(ntohl(req->tos_stid)),
+                                 ((struct sockaddr_in6 *)
+                                 &parent_ep->com.local_addr)->sin6_scope_id);
+       }
+       if (!dst) {
                printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
                       __func__);
                goto reject;
        }
-       dst = &rt->dst;
 
        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
        if (!child_ep) {
@@ -2007,7 +2135,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
                goto reject;
        }
 
-       err = import_ep(child_ep, peer_ip, dst, dev, false);
+       err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
        if (err) {
                printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
                       __func__);
@@ -2022,12 +2150,27 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
        state_set(&child_ep->com, CONNECTING);
        child_ep->com.dev = dev;
        child_ep->com.cm_id = NULL;
-       child_ep->com.local_addr.sin_family = PF_INET;
-       child_ep->com.local_addr.sin_port = local_port;
-       child_ep->com.local_addr.sin_addr.s_addr = local_ip;
-       child_ep->com.remote_addr.sin_family = PF_INET;
-       child_ep->com.remote_addr.sin_port = peer_port;
-       child_ep->com.remote_addr.sin_addr.s_addr = peer_ip;
+       if (iptype == 4) {
+               struct sockaddr_in *sin = (struct sockaddr_in *)
+                       &child_ep->com.local_addr;
+               sin->sin_family = PF_INET;
+               sin->sin_port = local_port;
+               sin->sin_addr.s_addr = *(__be32 *)local_ip;
+               sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
+               sin->sin_family = PF_INET;
+               sin->sin_port = peer_port;
+               sin->sin_addr.s_addr = *(__be32 *)peer_ip;
+       } else {
+               struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+                       &child_ep->com.local_addr;
+               sin6->sin6_family = PF_INET6;
+               sin6->sin6_port = local_port;
+               memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
+               sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
+               sin6->sin6_family = PF_INET6;
+               sin6->sin6_port = peer_port;
+               memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
+       }
        c4iw_get_ep(&parent_ep->com);
        child_ep->parent_ep = parent_ep;
        child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
@@ -2040,11 +2183,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
        init_timer(&child_ep->timer);
        cxgb4_insert_tid(t, child_ep, hwtid);
        insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
-       accept_cr(child_ep, peer_ip, skb, req);
+       accept_cr(child_ep, skb, req);
        set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
        goto out;
 reject:
-       reject_cr(dev, hwtid, peer_ip, skb);
+       reject_cr(dev, hwtid, skb);
 out:
        return 0;
 }
@@ -2512,12 +2655,79 @@ err:
        return err;
 }
 
+static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
+{
+       struct in_device *ind;
+       int found = 0;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+       ind = in_dev_get(dev->rdev.lldi.ports[0]);
+       if (!ind)
+               return -EADDRNOTAVAIL;
+       for_primary_ifa(ind) {
+               laddr->sin_addr.s_addr = ifa->ifa_address;
+               raddr->sin_addr.s_addr = ifa->ifa_address;
+               found = 1;
+               break;
+       }
+       endfor_ifa(ind);
+       in_dev_put(ind);
+       return found ? 0 : -EADDRNOTAVAIL;
+}
+
+static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
+                     unsigned char banned_flags)
+{
+       struct inet6_dev *idev;
+       int err = -EADDRNOTAVAIL;
+
+       rcu_read_lock();
+       idev = __in6_dev_get(dev);
+       if (idev != NULL) {
+               struct inet6_ifaddr *ifp;
+
+               read_lock_bh(&idev->lock);
+               list_for_each_entry(ifp, &idev->addr_list, if_list) {
+                       if (ifp->scope == IFA_LINK &&
+                           !(ifp->flags & banned_flags)) {
+                               memcpy(addr, &ifp->addr, 16);
+                               err = 0;
+                               break;
+                       }
+               }
+               read_unlock_bh(&idev->lock);
+       }
+       rcu_read_unlock();
+       return err;
+}
+
+static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
+{
+       struct in6_addr uninitialized_var(addr);
+       struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+       struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+
+       if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
+               memcpy(la6->sin6_addr.s6_addr, &addr, 16);
+               memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
+               return 0;
+       }
+       return -EADDRNOTAVAIL;
+}
+
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 {
        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
        struct c4iw_ep *ep;
-       struct rtable *rt;
        int err = 0;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+       struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+       struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
+                                     &cm_id->remote_addr;
+       __u8 *ra;
+       int iptype;
 
        if ((conn_param->ord > c4iw_max_read_depth) ||
            (conn_param->ird > c4iw_max_read_depth)) {
@@ -2545,7 +2755,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        ep->com.dev = dev;
        ep->com.cm_id = cm_id;
        ep->com.qp = get_qhp(dev, conn_param->qpn);
-       BUG_ON(!ep->com.qp);
+       if (!ep->com.qp) {
+               PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+               err = -EINVAL;
+               goto fail2;
+       }
        ref_qp(ep);
        PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
             ep->com.qp, cm_id);
@@ -2561,27 +2775,56 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        }
        insert_handle(dev, &dev->atid_idr, ep, ep->atid);
 
-       PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__,
-            ntohl(cm_id->local_addr.sin_addr.s_addr),
-            ntohs(cm_id->local_addr.sin_port),
-            ntohl(cm_id->remote_addr.sin_addr.s_addr),
-            ntohs(cm_id->remote_addr.sin_port));
+       if (cm_id->remote_addr.ss_family == AF_INET) {
+               iptype = 4;
+               ra = (__u8 *)&raddr->sin_addr;
 
-       /* find a route */
-       rt = find_route(dev,
-                       cm_id->local_addr.sin_addr.s_addr,
-                       cm_id->remote_addr.sin_addr.s_addr,
-                       cm_id->local_addr.sin_port,
-                       cm_id->remote_addr.sin_port, 0);
-       if (!rt) {
+               /*
+                * Handle loopback requests to INADDR_ANY.
+                */
+               if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
+                       err = pick_local_ipaddrs(dev, cm_id);
+                       if (err)
+                               goto fail2;
+               }
+
+               /* find a route */
+               PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+                    __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
+                    ra, ntohs(raddr->sin_port));
+               ep->dst = find_route(dev, laddr->sin_addr.s_addr,
+                                    raddr->sin_addr.s_addr, laddr->sin_port,
+                                    raddr->sin_port, 0);
+       } else {
+               iptype = 6;
+               ra = (__u8 *)&raddr6->sin6_addr;
+
+               /*
+                * Handle loopback requests to INADDR_ANY.
+                */
+               if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
+                       err = pick_local_ip6addrs(dev, cm_id);
+                       if (err)
+                               goto fail2;
+               }
+
+               /* find a route */
+               PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+                    __func__, laddr6->sin6_addr.s6_addr,
+                    ntohs(laddr6->sin6_port),
+                    raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+               ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
+                                     raddr6->sin6_addr.s6_addr,
+                                     laddr6->sin6_port, raddr6->sin6_port, 0,
+                                     raddr6->sin6_scope_id);
+       }
+       if (!ep->dst) {
                printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
                err = -EHOSTUNREACH;
                goto fail3;
        }
-       ep->dst = &rt->dst;
 
-       err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr,
-                       ep->dst, ep->com.dev, true);
+       err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
        if (err) {
                printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
                goto fail4;
@@ -2593,8 +2836,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        state_set(&ep->com, CONNECTING);
        ep->tos = 0;
-       ep->com.local_addr = cm_id->local_addr;
-       ep->com.remote_addr = cm_id->remote_addr;
+       memcpy(&ep->com.local_addr, &cm_id->local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+              sizeof(ep->com.remote_addr));
 
        /* send connect request to rnic */
        err = send_connect(ep);
@@ -2614,6 +2859,60 @@ out:
        return err;
 }
 
+static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
+{
+       int err;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+
+       c4iw_init_wr_wait(&ep->com.wr_wait);
+       err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
+                                  ep->stid, &sin6->sin6_addr,
+                                  sin6->sin6_port,
+                                  ep->com.dev->rdev.lldi.rxq_ids[0]);
+       if (!err)
+               err = c4iw_wait_for_reply(&ep->com.dev->rdev,
+                                         &ep->com.wr_wait,
+                                         0, 0, __func__);
+       if (err)
+               pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
+                      err, ep->stid,
+                      sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
+       return err;
+}
+
+static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
+{
+       int err;
+       struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr;
+
+       if (dev->rdev.lldi.enable_fw_ofld_conn) {
+               do {
+                       err = cxgb4_create_server_filter(
+                               ep->com.dev->rdev.lldi.ports[0], ep->stid,
+                               sin->sin_addr.s_addr, sin->sin_port, 0,
+                               ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
+                       if (err == -EBUSY) {
+                               set_current_state(TASK_UNINTERRUPTIBLE);
+                               schedule_timeout(usecs_to_jiffies(100));
+                       }
+               } while (err == -EBUSY);
+       } else {
+               c4iw_init_wr_wait(&ep->com.wr_wait);
+               err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
+                               ep->stid, sin->sin_addr.s_addr, sin->sin_port,
+                               0, ep->com.dev->rdev.lldi.rxq_ids[0]);
+               if (!err)
+                       err = c4iw_wait_for_reply(&ep->com.dev->rdev,
+                                                 &ep->com.wr_wait,
+                                                 0, 0, __func__);
+       }
+       if (err)
+               pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
+                      , err, ep->stid,
+                      &sin->sin_addr, ntohs(sin->sin_port));
+       return err;
+}
+
 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 {
        int err = 0;
@@ -2633,15 +2932,18 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
        ep->com.cm_id = cm_id;
        ep->com.dev = dev;
        ep->backlog = backlog;
-       ep->com.local_addr = cm_id->local_addr;
+       memcpy(&ep->com.local_addr, &cm_id->local_addr,
+              sizeof(ep->com.local_addr));
 
        /*
         * Allocate a server TID.
         */
        if (dev->rdev.lldi.enable_fw_ofld_conn)
-               ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep);
+               ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
+                                            cm_id->local_addr.ss_family, ep);
        else
-               ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
+               ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
+                                           cm_id->local_addr.ss_family, ep);
 
        if (ep->stid == -1) {
                printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
@@ -2650,43 +2952,16 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
        }
        insert_handle(dev, &dev->stid_idr, ep, ep->stid);
        state_set(&ep->com, LISTEN);
-       if (dev->rdev.lldi.enable_fw_ofld_conn) {
-               do {
-                       err = cxgb4_create_server_filter(
-                               ep->com.dev->rdev.lldi.ports[0], ep->stid,
-                               ep->com.local_addr.sin_addr.s_addr,
-                               ep->com.local_addr.sin_port,
-                               0,
-                               ep->com.dev->rdev.lldi.rxq_ids[0],
-                               0,
-                               0);
-                       if (err == -EBUSY) {
-                               set_current_state(TASK_UNINTERRUPTIBLE);
-                               schedule_timeout(usecs_to_jiffies(100));
-                       }
-               } while (err == -EBUSY);
-       } else {
-               c4iw_init_wr_wait(&ep->com.wr_wait);
-               err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
-                               ep->stid, ep->com.local_addr.sin_addr.s_addr,
-                               ep->com.local_addr.sin_port,
-                               0,
-                               ep->com.dev->rdev.lldi.rxq_ids[0]);
-               if (!err)
-                       err = c4iw_wait_for_reply(&ep->com.dev->rdev,
-                                                 &ep->com.wr_wait,
-                                                 0, 0, __func__);
-       }
+       if (ep->com.local_addr.ss_family == AF_INET)
+               err = create_server4(dev, ep);
+       else
+               err = create_server6(dev, ep);
        if (!err) {
                cm_id->provider_data = ep;
                goto out;
        }
-       pr_err("%s cxgb4_create_server/filter failed err %d " \
-              "stid %d laddr %08x lport %d\n", \
-              __func__, err, ep->stid,
-              ntohl(ep->com.local_addr.sin_addr.s_addr),
-              ntohs(ep->com.local_addr.sin_port));
-       cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
+       cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+                       ep->com.local_addr.ss_family);
 fail2:
        cm_id->rem_ref(cm_id);
        c4iw_put_ep(&ep->com);
@@ -2704,20 +2979,24 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
 
        might_sleep();
        state_set(&ep->com, DEAD);
-       if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) {
+       if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
+           ep->com.local_addr.ss_family == AF_INET) {
                err = cxgb4_remove_server_filter(
                        ep->com.dev->rdev.lldi.ports[0], ep->stid,
                        ep->com.dev->rdev.lldi.rxq_ids[0], 0);
        } else {
                c4iw_init_wr_wait(&ep->com.wr_wait);
-               err = listen_stop(ep);
+               err = cxgb4_remove_server(
+                               ep->com.dev->rdev.lldi.ports[0], ep->stid,
+                               ep->com.dev->rdev.lldi.rxq_ids[0], 0);
                if (err)
                        goto done;
                err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
                                          0, 0, __func__);
        }
        remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
-       cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
+       cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+                       ep->com.local_addr.ss_family);
 done:
        cm_id->rem_ref(cm_id);
        c4iw_put_ep(&ep->com);
@@ -3021,7 +3300,6 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
        struct cpl_pass_accept_req *req = (void *)(rss + 1);
        struct l2t_entry *e;
        struct dst_entry *dst;
-       struct rtable *rt;
        struct c4iw_ep *lep;
        u16 window;
        struct port_info *pi;
@@ -3079,14 +3357,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
             ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
             ntohs(tcph->source), iph->tos);
 
-       rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
-                       iph->tos);
-       if (!rt) {
+       dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
+                        iph->tos);
+       if (!dst) {
                pr_err("%s - failed to find dst entry!\n",
                       __func__);
                goto reject;
        }
-       dst = &rt->dst;
        neigh = dst_neigh_lookup_skb(dst, skb);
 
        if (!neigh) {
@@ -3103,10 +3380,11 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
                tx_chan = cxgb4_port_chan(pdev);
                dev_put(pdev);
        } else {
+               pdev = get_real_dev(neigh->dev);
                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
-                                       neigh->dev, 0);
-               pi = (struct port_info *)netdev_priv(neigh->dev);
-               tx_chan = cxgb4_port_chan(neigh->dev);
+                                       pdev, 0);
+               pi = (struct port_info *)netdev_priv(pdev);
+               tx_chan = cxgb4_port_chan(pdev);
        }
        if (!e) {
                pr_err("%s - failed to allocate l2t entry!\n",
index 0f1607c8325a5bc8a03e3a5d5471d39ed5741e8b..88de3aa9c5b0205952299a7823ad0bfc071d3a5d 100644 (file)
@@ -225,43 +225,186 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
        t4_swcq_produce(cq);
 }
 
-int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count)
+static void advance_oldest_read(struct t4_wq *wq);
+
+int c4iw_flush_sq(struct c4iw_qp *qhp)
 {
        int flushed = 0;
-       struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count];
-       int in_use = wq->sq.in_use - count;
-
-       BUG_ON(in_use < 0);
-       while (in_use--) {
-               swsqe->signaled = 0;
-               insert_sq_cqe(wq, cq, swsqe);
-               swsqe++;
-               if (swsqe == (wq->sq.sw_sq + wq->sq.size))
-                       swsqe = wq->sq.sw_sq;
-               flushed++;
+       struct t4_wq *wq = &qhp->wq;
+       struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
+       struct t4_cq *cq = &chp->cq;
+       int idx;
+       struct t4_swsqe *swsqe;
+       int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
+                       qhp->attr.state != C4IW_QP_STATE_IDLE);
+
+       if (wq->sq.flush_cidx == -1)
+               wq->sq.flush_cidx = wq->sq.cidx;
+       idx = wq->sq.flush_cidx;
+       BUG_ON(idx >= wq->sq.size);
+       while (idx != wq->sq.pidx) {
+               if (error) {
+                       swsqe = &wq->sq.sw_sq[idx];
+                       BUG_ON(swsqe->flushed);
+                       swsqe->flushed = 1;
+                       insert_sq_cqe(wq, cq, swsqe);
+                       if (wq->sq.oldest_read == swsqe) {
+                               BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+                               advance_oldest_read(wq);
+                       }
+                       flushed++;
+               } else {
+                       t4_sq_consume(wq);
+               }
+               if (++idx == wq->sq.size)
+                       idx = 0;
        }
+       wq->sq.flush_cidx += flushed;
+       if (wq->sq.flush_cidx >= wq->sq.size)
+               wq->sq.flush_cidx -= wq->sq.size;
        return flushed;
 }
 
+static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
+{
+       struct t4_swsqe *swsqe;
+       int cidx;
+
+       if (wq->sq.flush_cidx == -1)
+               wq->sq.flush_cidx = wq->sq.cidx;
+       cidx = wq->sq.flush_cidx;
+       BUG_ON(cidx > wq->sq.size);
+
+       while (cidx != wq->sq.pidx) {
+               swsqe = &wq->sq.sw_sq[cidx];
+               if (!swsqe->signaled) {
+                       if (++cidx == wq->sq.size)
+                               cidx = 0;
+               } else if (swsqe->complete) {
+
+                       BUG_ON(swsqe->flushed);
+
+                       /*
+                        * Insert this completed cqe into the swcq.
+                        */
+                       PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
+                                       __func__, cidx, cq->sw_pidx);
+                       swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
+                       cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
+                       t4_swcq_produce(cq);
+                       swsqe->flushed = 1;
+                       if (++cidx == wq->sq.size)
+                               cidx = 0;
+                       wq->sq.flush_cidx = cidx;
+               } else
+                       break;
+       }
+}
+
+static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
+               struct t4_cqe *read_cqe)
+{
+       read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
+       read_cqe->len = htonl(wq->sq.oldest_read->read_len);
+       read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
+                       V_CQE_SWCQE(SW_CQE(hw_cqe)) |
+                       V_CQE_OPCODE(FW_RI_READ_REQ) |
+                       V_CQE_TYPE(1));
+       read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
+}
+
+static void advance_oldest_read(struct t4_wq *wq)
+{
+
+       u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
+
+       if (rptr == wq->sq.size)
+               rptr = 0;
+       while (rptr != wq->sq.pidx) {
+               wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
+
+               if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
+                       return;
+               if (++rptr == wq->sq.size)
+                       rptr = 0;
+       }
+       wq->sq.oldest_read = NULL;
+}
+
 /*
  * Move all CQEs from the HWCQ into the SWCQ.
+ * Deal with out-of-order and/or completions that complete
+ * prior unsignalled WRs.
  */
-void c4iw_flush_hw_cq(struct t4_cq *cq)
+void c4iw_flush_hw_cq(struct c4iw_cq *chp)
 {
-       struct t4_cqe *cqe = NULL, *swcqe;
+       struct t4_cqe *hw_cqe, *swcqe, read_cqe;
+       struct c4iw_qp *qhp;
+       struct t4_swsqe *swsqe;
        int ret;
 
-       PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
-       ret = t4_next_hw_cqe(cq, &cqe);
+       PDBG("%s  cqid 0x%x\n", __func__, chp->cq.cqid);
+       ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+
+       /*
+        * This logic is similar to poll_cq(), but not quite the same
+        * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
+        * also do any translation magic that poll_cq() normally does.
+        */
        while (!ret) {
-               PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n",
-                    __func__, cq->cidx, cq->sw_pidx);
-               swcqe = &cq->sw_queue[cq->sw_pidx];
-               *swcqe = *cqe;
-               swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
-               t4_swcq_produce(cq);
-               t4_hwcq_consume(cq);
-               ret = t4_next_hw_cqe(cq, &cqe);
+               qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
+
+               /*
+                * drop CQEs with no associated QP
+                */
+               if (qhp == NULL)
+                       goto next_cqe;
+
+               if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
+                       goto next_cqe;
+
+               if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
+
+                       /*
+                        * drop peer2peer RTR reads.
+                        */
+                       if (CQE_WRID_STAG(hw_cqe) == 1)
+                               goto next_cqe;
+
+                       /*
+                        * Eat completions for unsignaled read WRs.
+                        */
+                       if (!qhp->wq.sq.oldest_read->signaled) {
+                               advance_oldest_read(&qhp->wq);
+                               goto next_cqe;
+                       }
+
+                       /*
+                        * Don't write to the HWCQ, create a new read req CQE
+                        * in local memory and move it into the swcq.
+                        */
+                       create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
+                       hw_cqe = &read_cqe;
+                       advance_oldest_read(&qhp->wq);
+               }
+
+               /* if its a SQ completion, then do the magic to move all the
+                * unsignaled and now in-order completions into the swcq.
+                */
+               if (SQ_TYPE(hw_cqe)) {
+                       swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
+                       swsqe->cqe = *hw_cqe;
+                       swsqe->complete = 1;
+                       flush_completed_wrs(&qhp->wq, &chp->cq);
+               } else {
+                       swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
+                       *swcqe = *hw_cqe;
+                       swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+                       t4_swcq_produce(&chp->cq);
+               }
+next_cqe:
+               t4_hwcq_consume(&chp->cq);
+               ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
        }
 }
 
@@ -281,25 +424,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
        return 1;
 }
 
-void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
-{
-       struct t4_cqe *cqe;
-       u32 ptr;
-
-       *count = 0;
-       ptr = cq->sw_cidx;
-       while (ptr != cq->sw_pidx) {
-               cqe = &cq->sw_queue[ptr];
-               if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) &&
-                                     wq->sq.oldest_read)) &&
-                   (CQE_QPID(cqe) == wq->sq.qid))
-                       (*count)++;
-               if (++ptr == cq->size)
-                       ptr = 0;
-       }
-       PDBG("%s cq %p count %d\n", __func__, cq, *count);
-}
-
 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
 {
        struct t4_cqe *cqe;
@@ -319,70 +443,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
        PDBG("%s cq %p count %d\n", __func__, cq, *count);
 }
 
-static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
-{
-       struct t4_swsqe *swsqe;
-       u16 ptr = wq->sq.cidx;
-       int count = wq->sq.in_use;
-       int unsignaled = 0;
-
-       swsqe = &wq->sq.sw_sq[ptr];
-       while (count--)
-               if (!swsqe->signaled) {
-                       if (++ptr == wq->sq.size)
-                               ptr = 0;
-                       swsqe = &wq->sq.sw_sq[ptr];
-                       unsignaled++;
-               } else if (swsqe->complete) {
-
-                       /*
-                        * Insert this completed cqe into the swcq.
-                        */
-                       PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
-                            __func__, ptr, cq->sw_pidx);
-                       swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
-                       cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
-                       t4_swcq_produce(cq);
-                       swsqe->signaled = 0;
-                       wq->sq.in_use -= unsignaled;
-                       break;
-               } else
-                       break;
-}
-
-static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
-                               struct t4_cqe *read_cqe)
-{
-       read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
-       read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len);
-       read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
-                                V_CQE_SWCQE(SW_CQE(hw_cqe)) |
-                                V_CQE_OPCODE(FW_RI_READ_REQ) |
-                                V_CQE_TYPE(1));
-       read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
-}
-
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
-static void advance_oldest_read(struct t4_wq *wq)
-{
-
-       u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
-
-       if (rptr == wq->sq.size)
-               rptr = 0;
-       while (rptr != wq->sq.pidx) {
-               wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
-
-               if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
-                       return;
-               if (++rptr == wq->sq.size)
-                       rptr = 0;
-       }
-       wq->sq.oldest_read = NULL;
-}
-
 /*
  * poll_cq
  *
@@ -426,6 +486,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                goto skip_cqe;
        }
 
+       /*
+       * skip hw cqe's if the wq is flushed.
+       */
+       if (wq->flushed && !SW_CQE(hw_cqe)) {
+               ret = -EAGAIN;
+               goto skip_cqe;
+       }
+
+       /*
+        * skip TERMINATE cqes...
+        */
+       if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
+               ret = -EAGAIN;
+               goto skip_cqe;
+       }
+
        /*
         * Gotta tweak READ completions:
         *      1) the cqe doesn't contain the sq_wptr from the wr.
@@ -440,13 +516,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                 * was generated by the kernel driver as part of peer-2-peer
                 * connection setup.  So ignore the completion.
                 */
-               if (!wq->sq.oldest_read) {
+               if (CQE_WRID_STAG(hw_cqe) == 1) {
                        if (CQE_STATUS(hw_cqe))
                                t4_set_wq_in_error(wq);
                        ret = -EAGAIN;
                        goto skip_cqe;
                }
 
+               /*
+                * Eat completions for unsignaled read WRs.
+                */
+               if (!wq->sq.oldest_read->signaled) {
+                       advance_oldest_read(wq);
+                       ret = -EAGAIN;
+                       goto skip_cqe;
+               }
+
                /*
                 * Don't write to the HWCQ, so create a new read req CQE
                 * in local memory.
@@ -457,14 +542,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
        }
 
        if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
-               *cqe_flushed = t4_wq_in_error(wq);
+               *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
                t4_set_wq_in_error(wq);
-               goto proc_cqe;
-       }
-
-       if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
-               ret = -EAGAIN;
-               goto skip_cqe;
        }
 
        /*
@@ -523,7 +602,24 @@ proc_cqe:
         * completion.
         */
        if (SQ_TYPE(hw_cqe)) {
-               wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe);
+               int idx = CQE_WRID_SQ_IDX(hw_cqe);
+               BUG_ON(idx > wq->sq.size);
+
+               /*
+               * Account for any unsignaled completions completed by
+               * this signaled completion.  In this case, cidx points
+               * to the first unsignaled one, and idx points to the
+               * signaled one.  So adjust in_use based on this delta.
+               * if this is not completing any unsigned wrs, then the
+               * delta will be 0. Handle wrapping also!
+               */
+               if (idx < wq->sq.cidx)
+                       wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
+               else
+                       wq->sq.in_use -= idx - wq->sq.cidx;
+               BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
+
+               wq->sq.cidx = (uint16_t)idx;
                PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
                *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
                t4_sq_consume(wq);
@@ -532,6 +628,7 @@ proc_cqe:
                *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
                BUG_ON(t4_rq_empty(wq));
                t4_rq_consume(wq);
+               goto skip_cqe;
        }
 
 flush_wq:
index ae656016e1ae968bdf4fff69e3c1d39de3a87e50..33d2cc6ab56220bed4a49af1fb0f22e68955508d 100644 (file)
@@ -103,18 +103,43 @@ static int dump_qp(int id, void *p, void *data)
        if (space == 0)
                return 1;
 
-       if (qp->ep)
-               cc = snprintf(qpd->buf + qpd->pos, space,
-                            "qp sq id %u rq id %u state %u onchip %u "
-                            "ep tid %u state %u %pI4:%u->%pI4:%u\n",
-                            qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
-                            qp->wq.sq.flags & T4_SQ_ONCHIP,
-                            qp->ep->hwtid, (int)qp->ep->com.state,
-                            &qp->ep->com.local_addr.sin_addr.s_addr,
-                            ntohs(qp->ep->com.local_addr.sin_port),
-                            &qp->ep->com.remote_addr.sin_addr.s_addr,
-                            ntohs(qp->ep->com.remote_addr.sin_port));
-       else
+       if (qp->ep) {
+               if (qp->ep->com.local_addr.ss_family == AF_INET) {
+                       struct sockaddr_in *lsin = (struct sockaddr_in *)
+                               &qp->ep->com.local_addr;
+                       struct sockaddr_in *rsin = (struct sockaddr_in *)
+                               &qp->ep->com.remote_addr;
+
+                       cc = snprintf(qpd->buf + qpd->pos, space,
+                                     "rc qp sq id %u rq id %u state %u "
+                                     "onchip %u ep tid %u state %u "
+                                     "%pI4:%u->%pI4:%u\n",
+                                     qp->wq.sq.qid, qp->wq.rq.qid,
+                                     (int)qp->attr.state,
+                                     qp->wq.sq.flags & T4_SQ_ONCHIP,
+                                     qp->ep->hwtid, (int)qp->ep->com.state,
+                                     &lsin->sin_addr, ntohs(lsin->sin_port),
+                                     &rsin->sin_addr, ntohs(rsin->sin_port));
+               } else {
+                       struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+                               &qp->ep->com.local_addr;
+                       struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
+                               &qp->ep->com.remote_addr;
+
+                       cc = snprintf(qpd->buf + qpd->pos, space,
+                                     "rc qp sq id %u rq id %u state %u "
+                                     "onchip %u ep tid %u state %u "
+                                     "%pI6:%u->%pI6:%u\n",
+                                     qp->wq.sq.qid, qp->wq.rq.qid,
+                                     (int)qp->attr.state,
+                                     qp->wq.sq.flags & T4_SQ_ONCHIP,
+                                     qp->ep->hwtid, (int)qp->ep->com.state,
+                                     &lsin6->sin6_addr,
+                                     ntohs(lsin6->sin6_port),
+                                     &rsin6->sin6_addr,
+                                     ntohs(rsin6->sin6_port));
+               }
+       } else
                cc = snprintf(qpd->buf + qpd->pos, space,
                             "qp sq id %u rq id %u state %u onchip %u\n",
                              qp->wq.sq.qid, qp->wq.rq.qid,
@@ -351,15 +376,37 @@ static int dump_ep(int id, void *p, void *data)
        if (space == 0)
                return 1;
 
-       cc = snprintf(epd->buf + epd->pos, space,
-                       "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx "
-                       "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n",
-                       ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state,
-                       ep->com.flags, ep->com.history, ep->hwtid, ep->atid,
-                       &ep->com.local_addr.sin_addr.s_addr,
-                       ntohs(ep->com.local_addr.sin_port),
-                       &ep->com.remote_addr.sin_addr.s_addr,
-                       ntohs(ep->com.remote_addr.sin_port));
+       if (ep->com.local_addr.ss_family == AF_INET) {
+               struct sockaddr_in *lsin = (struct sockaddr_in *)
+                       &ep->com.local_addr;
+               struct sockaddr_in *rsin = (struct sockaddr_in *)
+                       &ep->com.remote_addr;
+
+               cc = snprintf(epd->buf + epd->pos, space,
+                             "ep %p cm_id %p qp %p state %d flags 0x%lx "
+                             "history 0x%lx hwtid %d atid %d "
+                             "%pI4:%d <-> %pI4:%d\n",
+                             ep, ep->com.cm_id, ep->com.qp,
+                             (int)ep->com.state, ep->com.flags,
+                             ep->com.history, ep->hwtid, ep->atid,
+                             &lsin->sin_addr, ntohs(lsin->sin_port),
+                             &rsin->sin_addr, ntohs(rsin->sin_port));
+       } else {
+               struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+                       &ep->com.local_addr;
+               struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
+                       &ep->com.remote_addr;
+
+               cc = snprintf(epd->buf + epd->pos, space,
+                             "ep %p cm_id %p qp %p state %d flags 0x%lx "
+                             "history 0x%lx hwtid %d atid %d "
+                             "%pI6:%d <-> %pI6:%d\n",
+                             ep, ep->com.cm_id, ep->com.qp,
+                             (int)ep->com.state, ep->com.flags,
+                             ep->com.history, ep->hwtid, ep->atid,
+                             &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+                             &rsin6->sin6_addr, ntohs(rsin6->sin6_port));
+       }
        if (cc < space)
                epd->pos += cc;
        return 0;
@@ -376,12 +423,27 @@ static int dump_listen_ep(int id, void *p, void *data)
        if (space == 0)
                return 1;
 
-       cc = snprintf(epd->buf + epd->pos, space,
-                       "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d "
-                       "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state,
-                       ep->com.flags, ep->stid, ep->backlog,
-                       &ep->com.local_addr.sin_addr.s_addr,
-                       ntohs(ep->com.local_addr.sin_port));
+       if (ep->com.local_addr.ss_family == AF_INET) {
+               struct sockaddr_in *lsin = (struct sockaddr_in *)
+                       &ep->com.local_addr;
+
+               cc = snprintf(epd->buf + epd->pos, space,
+                             "ep %p cm_id %p state %d flags 0x%lx stid %d "
+                             "backlog %d %pI4:%d\n",
+                             ep, ep->com.cm_id, (int)ep->com.state,
+                             ep->com.flags, ep->stid, ep->backlog,
+                             &lsin->sin_addr, ntohs(lsin->sin_port));
+       } else {
+               struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+                       &ep->com.local_addr;
+
+               cc = snprintf(epd->buf + epd->pos, space,
+                             "ep %p cm_id %p state %d flags 0x%lx stid %d "
+                             "backlog %d %pI6:%d\n",
+                             ep, ep->com.cm_id, (int)ep->com.state,
+                             ep->com.flags, ep->stid, ep->backlog,
+                             &lsin6->sin6_addr, ntohs(lsin6->sin6_port));
+       }
        if (cc < space)
                epd->pos += cc;
        return 0;
index 1a840b2211dd385f35c0567bbb0f9a61df43626c..d61d0a18f784c9d1b032be277a87524f89449e41 100644 (file)
@@ -44,16 +44,6 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
        struct c4iw_qp_attributes attrs;
        unsigned long flag;
 
-       if ((qhp->attr.state == C4IW_QP_STATE_ERROR) ||
-           (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) {
-               pr_err("%s AE after RTS - qpid 0x%x opcode %d status 0x%x "\
-                      "type %d wrid.hi 0x%x wrid.lo 0x%x\n",
-                      __func__, CQE_QPID(err_cqe), CQE_OPCODE(err_cqe),
-                      CQE_STATUS(err_cqe), CQE_TYPE(err_cqe),
-                      CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
-               return;
-       }
-
        printk(KERN_ERR MOD "AE qpid 0x%x opcode %d status 0x%x "
               "type %d wrid.hi 0x%x wrid.lo 0x%x\n",
               CQE_QPID(err_cqe), CQE_OPCODE(err_cqe),
index 485183ad34cd7ab7b2db8c2e310974fb319bd616..23eaeabab93b50d483e279de2adb7175c7c29dfc 100644 (file)
@@ -752,8 +752,8 @@ struct c4iw_ep_common {
        enum c4iw_ep_state state;
        struct kref kref;
        struct mutex mutex;
-       struct sockaddr_in local_addr;
-       struct sockaddr_in remote_addr;
+       struct sockaddr_storage local_addr;
+       struct sockaddr_storage remote_addr;
        struct c4iw_wr_wait wr_wait;
        unsigned long flags;
        unsigned long history;
@@ -917,12 +917,11 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
 u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
 void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb);
-void c4iw_flush_hw_cq(struct t4_cq *cq);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp);
 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
-void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
-int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count);
+int c4iw_flush_sq(struct c4iw_qp *qhp);
 int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
 u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
 int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
index a4975e1654a639960b214114648593564fcb0b27..582936708e6e492dfca46b88b5db98f16159c0db 100644 (file)
@@ -737,6 +737,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                swsqe->idx = qhp->wq.sq.pidx;
                swsqe->complete = 0;
                swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+               swsqe->flushed = 0;
                swsqe->wr_id = wr->wr_id;
 
                init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1006,7 +1007,15 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
        /* locking hierarchy: cq lock first, then qp lock. */
        spin_lock_irqsave(&rchp->lock, flag);
        spin_lock(&qhp->lock);
-       c4iw_flush_hw_cq(&rchp->cq);
+
+       if (qhp->wq.flushed) {
+               spin_unlock(&qhp->lock);
+               spin_unlock_irqrestore(&rchp->lock, flag);
+               return;
+       }
+       qhp->wq.flushed = 1;
+
+       c4iw_flush_hw_cq(rchp);
        c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
        flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
        spin_unlock(&qhp->lock);
@@ -1020,9 +1029,9 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
        /* locking hierarchy: cq lock first, then qp lock. */
        spin_lock_irqsave(&schp->lock, flag);
        spin_lock(&qhp->lock);
-       c4iw_flush_hw_cq(&schp->cq);
-       c4iw_count_scqes(&schp->cq, &qhp->wq, &count);
-       flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
+       if (schp != rchp)
+               c4iw_flush_hw_cq(schp);
+       flushed = c4iw_flush_sq(qhp);
        spin_unlock(&qhp->lock);
        spin_unlock_irqrestore(&schp->lock, flag);
        if (flushed) {
@@ -1037,11 +1046,11 @@ static void flush_qp(struct c4iw_qp *qhp)
        struct c4iw_cq *rchp, *schp;
        unsigned long flag;
 
-       rchp = get_chp(qhp->rhp, qhp->attr.rcq);
-       schp = get_chp(qhp->rhp, qhp->attr.scq);
+       rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+       schp = to_c4iw_cq(qhp->ibqp.send_cq);
 
+       t4_set_wq_in_error(&qhp->wq);
        if (qhp->ibqp.uobject) {
-               t4_set_wq_in_error(&qhp->wq);
                t4_set_cq_in_error(&rchp->cq);
                spin_lock_irqsave(&rchp->comp_handler_lock, flag);
                (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1330,8 +1339,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                                disconnect = 1;
                                c4iw_get_ep(&qhp->ep->com);
                        }
-                       if (qhp->ibqp.uobject)
-                               t4_set_wq_in_error(&qhp->wq);
+                       t4_set_wq_in_error(&qhp->wq);
                        ret = rdma_fini(rhp, qhp, ep);
                        if (ret)
                                goto err;
@@ -1340,18 +1348,21 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
                        set_state(qhp, C4IW_QP_STATE_TERMINATE);
                        qhp->attr.layer_etype = attrs->layer_etype;
                        qhp->attr.ecode = attrs->ecode;
-                       if (qhp->ibqp.uobject)
-                               t4_set_wq_in_error(&qhp->wq);
+                       t4_set_wq_in_error(&qhp->wq);
                        ep = qhp->ep;
+                       disconnect = 1;
                        if (!internal)
                                terminate = 1;
-                       disconnect = 1;
+                       else {
+                               ret = rdma_fini(rhp, qhp, ep);
+                               if (ret)
+                                       goto err;
+                       }
                        c4iw_get_ep(&qhp->ep->com);
                        break;
                case C4IW_QP_STATE_ERROR:
                        set_state(qhp, C4IW_QP_STATE_ERROR);
-                       if (qhp->ibqp.uobject)
-                               t4_set_wq_in_error(&qhp->wq);
+                       t4_set_wq_in_error(&qhp->wq);
                        if (!internal) {
                                abort = 1;
                                disconnect = 1;
@@ -1552,12 +1563,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 
        ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
 
-
        qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
        if (!qhp)
                return ERR_PTR(-ENOMEM);
        qhp->wq.sq.size = sqsize;
        qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue;
+       qhp->wq.sq.flush_cidx = -1;
        qhp->wq.rq.size = rqsize;
        qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue;
 
index ebcb03bd1b72ed6003bd7ae85e88dd8ce9c5595d..e73ace739183a9864ab8fee70e374cdc9ce0d178 100644 (file)
@@ -36,9 +36,9 @@
 #include "t4_msg.h"
 #include "t4fw_ri_api.h"
 
-#define T4_MAX_NUM_QP (1<<16)
-#define T4_MAX_NUM_CQ (1<<15)
-#define T4_MAX_NUM_PD (1<<15)
+#define T4_MAX_NUM_QP 65536
+#define T4_MAX_NUM_CQ 65536
+#define T4_MAX_NUM_PD 65536
 #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
 #define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES)
 #define T4_MAX_IQ_SIZE (65520 - 1)
@@ -47,7 +47,7 @@
 #define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1)
 #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1)
 #define T4_MAX_NUM_STAG (1<<15)
-#define T4_MAX_MR_SIZE (~0ULL - 1)
+#define T4_MAX_MR_SIZE (~0ULL)
 #define T4_PAGESIZE_MASK 0xffff000  /* 4KB-128MB */
 #define T4_STAG_UNSET 0xffffffff
 #define T4_FW_MAJ 0
@@ -269,6 +269,7 @@ struct t4_swsqe {
        int                     complete;
        int                     signaled;
        u16                     idx;
+       int                     flushed;
 };
 
 static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
@@ -300,6 +301,7 @@ struct t4_sq {
        u16 pidx;
        u16 wq_pidx;
        u16 flags;
+       short flush_cidx;
 };
 
 struct t4_swrqe {
@@ -330,6 +332,7 @@ struct t4_wq {
        void __iomem *db;
        void __iomem *gts;
        struct c4iw_rdev *rdev;
+       int flushed;
 };
 
 static inline int t4_rqes_posted(struct t4_wq *wq)
@@ -412,6 +415,9 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
 
 static inline void t4_sq_consume(struct t4_wq *wq)
 {
+       BUG_ON(wq->sq.in_use < 1);
+       if (wq->sq.cidx == wq->sq.flush_cidx)
+               wq->sq.flush_cidx = -1;
        wq->sq.in_use--;
        if (++wq->sq.cidx == wq->sq.size)
                wq->sq.cidx = 0;
@@ -505,12 +511,18 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se)
 static inline void t4_swcq_produce(struct t4_cq *cq)
 {
        cq->sw_in_use++;
+       if (cq->sw_in_use == cq->size) {
+               PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+               cq->error = 1;
+               BUG_ON(1);
+       }
        if (++cq->sw_pidx == cq->size)
                cq->sw_pidx = 0;
 }
 
 static inline void t4_swcq_consume(struct t4_cq *cq)
 {
+       BUG_ON(cq->sw_in_use < 1);
        cq->sw_in_use--;
        if (++cq->sw_cidx == cq->size)
                cq->sw_cidx = 0;
@@ -519,7 +531,7 @@ static inline void t4_swcq_consume(struct t4_cq *cq)
 static inline void t4_hwcq_consume(struct t4_cq *cq)
 {
        cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
-       if (++cq->cidx_inc == (cq->size >> 4)) {
+       if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_MASK) {
                u32 val;
 
                val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
@@ -552,6 +564,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
                ret = -EOVERFLOW;
                cq->error = 1;
                printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+               BUG_ON(1);
        } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
                *cqe = &cq->queue[cq->cidx];
                ret = 0;
@@ -562,6 +575,12 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
 
 static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
 {
+       if (cq->sw_in_use == cq->size) {
+               PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+               cq->error = 1;
+               BUG_ON(1);
+               return NULL;
+       }
        if (cq->sw_in_use)
                return &cq->sw_queue[cq->sw_cidx];
        return NULL;
index a188d31785590e9cf59c7f3bf77926a11817e4b7..d6c5a73becf40ecfc0f177422f34de1f14247f32 100644 (file)
@@ -54,6 +54,8 @@
 #define DRV_VERSION    "1.0"
 #define DRV_RELDATE    "April 4, 2008"
 
+#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -88,6 +90,25 @@ static void init_query_mad(struct ib_smp *mad)
 
 static union ib_gid zgid;
 
+static int check_flow_steering_support(struct mlx4_dev *dev)
+{
+       int ib_num_ports = 0;
+       int i;
+
+       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+               ib_num_ports++;
+
+       if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+               if (ib_num_ports || mlx4_is_mfunc(dev)) {
+                       pr_warn("Device managed flow steering is unavailable "
+                               "for IB ports or in multifunction env.\n");
+                       return 0;
+               }
+               return 1;
+       }
+       return 0;
+}
+
 static int mlx4_ib_query_device(struct ib_device *ibdev,
                                struct ib_device_attr *props)
 {
@@ -144,6 +165,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                        props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
                else
                        props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
+       if (check_flow_steering_support(dev->dev))
+               props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
        }
 
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
@@ -798,6 +821,209 @@ struct mlx4_ib_steering {
        union ib_gid gid;
 };
 
+static int parse_flow_attr(struct mlx4_dev *dev,
+                          union ib_flow_spec *ib_spec,
+                          struct _rule_hw *mlx4_spec)
+{
+       enum mlx4_net_trans_rule_id type;
+
+       switch (ib_spec->type) {
+       case IB_FLOW_SPEC_ETH:
+               type = MLX4_NET_TRANS_RULE_ID_ETH;
+               memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
+                      ETH_ALEN);
+               memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
+                      ETH_ALEN);
+               mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
+               mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
+               break;
+
+       case IB_FLOW_SPEC_IPV4:
+               type = MLX4_NET_TRANS_RULE_ID_IPV4;
+               mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
+               mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
+               mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
+               mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
+               break;
+
+       case IB_FLOW_SPEC_TCP:
+       case IB_FLOW_SPEC_UDP:
+               type = ib_spec->type == IB_FLOW_SPEC_TCP ?
+                                       MLX4_NET_TRANS_RULE_ID_TCP :
+                                       MLX4_NET_TRANS_RULE_ID_UDP;
+               mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
+               mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
+               mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
+               mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+       if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
+           mlx4_hw_rule_sz(dev, type) < 0)
+               return -EINVAL;
+       mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
+       mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
+       return mlx4_hw_rule_sz(dev, type);
+}
+
+static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
+                         int domain,
+                         enum mlx4_net_trans_promisc_mode flow_type,
+                         u64 *reg_id)
+{
+       int ret, i;
+       int size = 0;
+       void *ib_flow;
+       struct mlx4_ib_dev *mdev = to_mdev(qp->device);
+       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+       size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
+                          (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
+
+       static const u16 __mlx4_domain[] = {
+               [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
+               [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
+               [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
+               [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
+       };
+
+       if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
+               pr_err("Invalid priority value %d\n", flow_attr->priority);
+               return -EINVAL;
+       }
+
+       if (domain >= IB_FLOW_DOMAIN_NUM) {
+               pr_err("Invalid domain value %d\n", domain);
+               return -EINVAL;
+       }
+
+       if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
+               return -EINVAL;
+
+       mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+       memset(mailbox->buf, 0, rule_size);
+       ctrl = mailbox->buf;
+
+       ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
+                                flow_attr->priority);
+       ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
+       ctrl->port = flow_attr->port;
+       ctrl->qpn = cpu_to_be32(qp->qp_num);
+
+       ib_flow = flow_attr + 1;
+       size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+       for (i = 0; i < flow_attr->num_of_specs; i++) {
+               ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+               if (ret < 0) {
+                       mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+                       return -EINVAL;
+               }
+               ib_flow += ((union ib_flow_spec *) ib_flow)->size;
+               size += ret;
+       }
+
+       ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
+                          MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+                          MLX4_CMD_NATIVE);
+       if (ret == -ENOMEM)
+               pr_err("mcg table is full. Fail to register network rule.\n");
+       else if (ret == -ENXIO)
+               pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
+       else if (ret)
+               pr_err("Invalid argumant. Fail to register network rule.\n");
+
+       mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+       return ret;
+}
+
+static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
+{
+       int err;
+       err = mlx4_cmd(dev, reg_id, 0, 0,
+                      MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+                      MLX4_CMD_NATIVE);
+       if (err)
+               pr_err("Fail to detach network rule. registration id = 0x%llx\n",
+                      reg_id);
+       return err;
+}
+
+static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
+                                   struct ib_flow_attr *flow_attr,
+                                   int domain)
+{
+       int err = 0, i = 0;
+       struct mlx4_ib_flow *mflow;
+       enum mlx4_net_trans_promisc_mode type[2];
+
+       memset(type, 0, sizeof(type));
+
+       mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
+       if (!mflow) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       switch (flow_attr->type) {
+       case IB_FLOW_ATTR_NORMAL:
+               type[0] = MLX4_FS_REGULAR;
+               break;
+
+       case IB_FLOW_ATTR_ALL_DEFAULT:
+               type[0] = MLX4_FS_ALL_DEFAULT;
+               break;
+
+       case IB_FLOW_ATTR_MC_DEFAULT:
+               type[0] = MLX4_FS_MC_DEFAULT;
+               break;
+
+       case IB_FLOW_ATTR_SNIFFER:
+               type[0] = MLX4_FS_UC_SNIFFER;
+               type[1] = MLX4_FS_MC_SNIFFER;
+               break;
+
+       default:
+               err = -EINVAL;
+               goto err_free;
+       }
+
+       while (i < ARRAY_SIZE(type) && type[i]) {
+               err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
+                                           &mflow->reg_id[i]);
+               if (err)
+                       goto err_free;
+               i++;
+       }
+
+       return &mflow->ibflow;
+
+err_free:
+       kfree(mflow);
+       return ERR_PTR(err);
+}
+
+static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
+{
+       int err, ret = 0;
+       int i = 0;
+       struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
+       struct mlx4_ib_flow *mflow = to_mflow(flow_id);
+
+       while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
+               err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+               if (err)
+                       ret = err;
+               i++;
+       }
+
+       kfree(mflow);
+       return ret;
+}
+
 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
        int err;
@@ -1461,6 +1687,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                        (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
        }
 
+       if (check_flow_steering_support(dev)) {
+               ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
+               ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
+
+               ibdev->ib_dev.uverbs_cmd_mask   |=
+                       (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
+                       (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
+       }
+
        mlx4_ib_alloc_eqs(dev, ibdev);
 
        spin_lock_init(&iboe->lock);
index f61ec26500c4919628abf3eb5ee570e878a43155..036b663dd26ecfbe3f6089b593483d97db489bcc 100644 (file)
@@ -132,6 +132,12 @@ struct mlx4_ib_fmr {
        struct mlx4_fmr         mfmr;
 };
 
+struct mlx4_ib_flow {
+       struct ib_flow ibflow;
+       /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
+       u64 reg_id[2];
+};
+
 struct mlx4_ib_wq {
        u64                    *wrid;
        spinlock_t              lock;
@@ -552,6 +558,12 @@ static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
 {
        return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
 }
+
+static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
+{
+       return container_of(ibflow, struct mlx4_ib_flow, ibflow);
+}
+
 static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
 {
        return container_of(ibqp, struct mlx4_ib_qp, ibqp);
index 24b9f1a0107b9174b7035d12b560f69fa90633c1..6b29249aa85a9ed59b24df4c5fc42636371b9181 100644 (file)
@@ -2998,6 +2998,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        u8 *start_ptr = &start_addr;
        u8 **start_buff = &start_ptr;
        u16 buff_len = 0;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
 
        ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
@@ -3062,8 +3064,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        /* setup our first outgoing iWarp send WQE (the IETF frame response) */
        wqe = &nesqp->hwqp.sq_vbase[0];
 
-       if (cm_id->remote_addr.sin_addr.s_addr !=
-           cm_id->local_addr.sin_addr.s_addr) {
+       if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) {
                u64temp = (unsigned long)nesqp;
                nesibdev = nesvnic->nesibdev;
                nespd = nesqp->nespd;
@@ -3132,13 +3133,10 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        nes_cm_init_tsa_conn(nesqp, cm_node);
 
-       nesqp->nesqp_context->tcpPorts[0] =
-               cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
-       nesqp->nesqp_context->tcpPorts[1] =
-               cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+       nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
+       nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
 
-       nesqp->nesqp_context->ip0 =
-                       cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+       nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
 
        nesqp->nesqp_context->misc2 |= cpu_to_le32(
                (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3162,9 +3160,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        memset(&nes_quad, 0, sizeof(nes_quad));
        nes_quad.DstIpAdrIndex =
                cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-       nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
-       nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
-       nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+       nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
+       nes_quad.TcpPorts[0] = raddr->sin_port;
+       nes_quad.TcpPorts[1] = laddr->sin_port;
 
        /* Produce hash key */
        crc_value = get_crc_value(&nes_quad);
@@ -3180,10 +3178,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
                  "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
                  "private data length=%u.\n", nesqp->hwqp.qp_id,
-                 ntohl(cm_id->remote_addr.sin_addr.s_addr),
-                 ntohs(cm_id->remote_addr.sin_port),
-                 ntohl(cm_id->local_addr.sin_addr.s_addr),
-                 ntohs(cm_id->local_addr.sin_port),
+                 ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port),
+                 ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port),
                  le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
                  le32_to_cpu(nesqp->nesqp_context->snd_nxt),
                  buff_len);
@@ -3263,7 +3259,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct nes_cm_node *cm_node;
        struct nes_cm_info cm_info;
        int apbvt_set = 0;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
 
+       if (cm_id->remote_addr.ss_family != AF_INET)
+               return -ENOSYS;
        ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
                return -EINVAL;
@@ -3277,16 +3277,14 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (!nesdev)
                return -EINVAL;
 
-       if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
+       if (!laddr->sin_port || !raddr->sin_port)
                return -EINVAL;
 
        nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
                  "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
-                 ntohl(nesvnic->local_ipaddr),
-                 ntohl(cm_id->remote_addr.sin_addr.s_addr),
-                 ntohs(cm_id->remote_addr.sin_port),
-                 ntohl(cm_id->local_addr.sin_addr.s_addr),
-                 ntohs(cm_id->local_addr.sin_port));
+                 ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr),
+                 ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr),
+                 ntohs(laddr->sin_port));
 
        atomic_inc(&cm_connects);
        nesqp->active_conn = 1;
@@ -3306,18 +3304,18 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
                  conn_param->private_data_len);
 
-       if (cm_id->local_addr.sin_addr.s_addr !=
-           cm_id->remote_addr.sin_addr.s_addr) {
-               nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
-                                PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+       if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
+               nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+                                PCI_FUNC(nesdev->pcidev->devfn),
+                                NES_MANAGE_APBVT_ADD);
                apbvt_set = 1;
        }
 
        /* set up the connection params for the node */
-       cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr);
-       cm_info.loc_port = htons(cm_id->local_addr.sin_port);
-       cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr);
-       cm_info.rem_port = htons(cm_id->remote_addr.sin_port);
+       cm_info.loc_addr = htonl(laddr->sin_addr.s_addr);
+       cm_info.loc_port = htons(laddr->sin_port);
+       cm_info.rem_addr = htonl(raddr->sin_addr.s_addr);
+       cm_info.rem_port = htons(raddr->sin_port);
        cm_info.cm_id = cm_id;
        cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 
@@ -3329,7 +3327,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                                          &cm_info);
        if (!cm_node) {
                if (apbvt_set)
-                       nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+                       nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
                                         PCI_FUNC(nesdev->pcidev->devfn),
                                         NES_MANAGE_APBVT_DEL);
 
@@ -3355,10 +3353,13 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
        struct nes_cm_listener *cm_node;
        struct nes_cm_info cm_info;
        int err;
+       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
 
        nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
-                       cm_id, ntohs(cm_id->local_addr.sin_port));
+                 cm_id, ntohs(laddr->sin_port));
 
+       if (cm_id->local_addr.ss_family != AF_INET)
+               return -ENOSYS;
        nesvnic = to_nesvnic(cm_id->device);
        if (!nesvnic)
                return -EINVAL;
@@ -3367,11 +3368,11 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
                        nesvnic, nesvnic->netdev, nesvnic->netdev->name);
 
        nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
-                       nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr);
+                       nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
 
        /* setup listen params in our api call struct */
        cm_info.loc_addr = nesvnic->local_ipaddr;
-       cm_info.loc_port = cm_id->local_addr.sin_port;
+       cm_info.loc_port = laddr->sin_port;
        cm_info.backlog = backlog;
        cm_info.cm_id = cm_id;
 
@@ -3388,8 +3389,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
        cm_id->provider_data = cm_node;
 
        if (!cm_node->reused_node) {
-               err = nes_manage_apbvt(nesvnic,
-                                      ntohs(cm_id->local_addr.sin_port),
+               err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
                                       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
                                       NES_MANAGE_APBVT_ADD);
                if (err) {
@@ -3487,6 +3487,9 @@ static void cm_event_connected(struct nes_cm_event *event)
        struct nes_v4_quad nes_quad;
        u32 crc_value;
        int ret;
+       struct sockaddr_in *laddr;
+       struct sockaddr_in *raddr;
+       struct sockaddr_in *cm_event_laddr;
 
        /* get all our handles */
        cm_node = event->cm_node;
@@ -3496,27 +3499,24 @@ static void cm_event_connected(struct nes_cm_event *event)
        nesvnic = to_nesvnic(nesqp->ibqp.device);
        nesdev = nesvnic->nesdev;
        nesadapter = nesdev->nesadapter;
+       laddr = (struct sockaddr_in *)&cm_id->local_addr;
+       raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+       cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
 
        if (nesqp->destroyed)
                return;
        atomic_inc(&cm_connecteds);
        nes_debug(NES_DBG_CM, "QP%u attempting to connect to  0x%08X:0x%04X on"
                  " local port 0x%04X. jiffies = %lu.\n",
-                 nesqp->hwqp.qp_id,
-                 ntohl(cm_id->remote_addr.sin_addr.s_addr),
-                 ntohs(cm_id->remote_addr.sin_port),
-                 ntohs(cm_id->local_addr.sin_port),
-                 jiffies);
+                 nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr),
+                 ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies);
 
        nes_cm_init_tsa_conn(nesqp, cm_node);
 
        /* set the QP tsa context */
-       nesqp->nesqp_context->tcpPorts[0] =
-               cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
-       nesqp->nesqp_context->tcpPorts[1] =
-               cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
-       nesqp->nesqp_context->ip0 =
-                       cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+       nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
+       nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
+       nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
 
        nesqp->nesqp_context->misc2 |= cpu_to_le32(
                        (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3544,9 +3544,9 @@ static void cm_event_connected(struct nes_cm_event *event)
 
        nes_quad.DstIpAdrIndex =
                cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-       nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
-       nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
-       nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+       nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
+       nes_quad.TcpPorts[0] = raddr->sin_port;
+       nes_quad.TcpPorts[1] = laddr->sin_port;
 
        /* Produce hash key */
        crc_value = get_crc_value(&nes_quad);
@@ -3565,8 +3565,8 @@ static void cm_event_connected(struct nes_cm_event *event)
        cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
        cm_event.status = 0;
        cm_event.provider_data = cm_id->provider_data;
-       cm_event.local_addr.sin_family = AF_INET;
-       cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
+       cm_event_laddr->sin_family = AF_INET;
+       cm_event_laddr->sin_port = laddr->sin_port;
        cm_event.remote_addr = cm_id->remote_addr;
 
        cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
@@ -3574,7 +3574,7 @@ static void cm_event_connected(struct nes_cm_event *event)
        cm_event.ird = cm_node->ird_size;
        cm_event.ord = cm_node->ord_size;
 
-       cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
+       cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr;
        ret = cm_id->event_handler(cm_id, &cm_event);
        nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 
@@ -3627,9 +3627,16 @@ static void cm_event_connect_error(struct nes_cm_event *event)
        cm_event.private_data = NULL;
        cm_event.private_data_len = 0;
 
-       nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
-                 "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
-                 cm_event.remote_addr.sin_addr.s_addr);
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+       {
+               struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+                                                    &cm_event.local_addr;
+               struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+                                                    &cm_event.remote_addr;
+               nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n",
+                         cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr);
+       }
+#endif
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
@@ -3709,6 +3716,10 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
        struct iw_cm_event cm_event;
        int ret;
        struct nes_cm_node *cm_node;
+       struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+                                            &cm_event.local_addr;
+       struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+                                            &cm_event.remote_addr;
 
        cm_node = event->cm_node;
        if (!cm_node)
@@ -3723,13 +3734,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
        cm_event.status = 0;
        cm_event.provider_data = (void *)cm_node;
 
-       cm_event.local_addr.sin_family = AF_INET;
-       cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
-       cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+       cm_event_laddr->sin_family = AF_INET;
+       cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
+       cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
 
-       cm_event.remote_addr.sin_family = AF_INET;
-       cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
-       cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+       cm_event_raddr->sin_family = AF_INET;
+       cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
+       cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
        cm_event.private_data = cm_node->mpa_frame_buf;
        cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
        cm_event.ird = cm_node->ird_size;
@@ -3749,6 +3760,10 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
        struct iw_cm_event cm_event;
        struct nes_cm_node *cm_node;
        int ret;
+       struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+                                            &cm_event.local_addr;
+       struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+                                            &cm_event.remote_addr;
 
        cm_node = event->cm_node;
        if (!cm_node)
@@ -3763,21 +3778,21 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
        cm_event.status = -ECONNREFUSED;
        cm_event.provider_data = cm_id->provider_data;
 
-       cm_event.local_addr.sin_family = AF_INET;
-       cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
-       cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+       cm_event_laddr->sin_family = AF_INET;
+       cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
+       cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
 
-       cm_event.remote_addr.sin_family = AF_INET;
-       cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
-       cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+       cm_event_raddr->sin_family = AF_INET;
+       cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
+       cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
 
        cm_event.private_data = cm_node->mpa_frame_buf;
        cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
 
        nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
                  "remove_addr=%08x\n",
-                 cm_event.local_addr.sin_addr.s_addr,
-                 cm_event.remote_addr.sin_addr.s_addr);
+                 cm_event_laddr->sin_addr.s_addr,
+                 cm_event_raddr->sin_addr.s_addr);
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        if (ret)
index d540180a8e420865a71387a630a509b203959747..adc11d14f8783faa9afcac1ee6b6faaadcb8d267 100644 (file)
@@ -56,10 +56,12 @@ struct ocrdma_dev_attr {
        u16 max_qp;
        u16 max_wqe;
        u16 max_rqe;
+       u16 max_srq;
        u32 max_inline_data;
        int max_send_sge;
        int max_recv_sge;
        int max_srq_sge;
+       int max_rdma_sge;
        int max_mr;
        u64 max_mr_size;
        u32 max_num_mr_pbl;
@@ -130,8 +132,7 @@ struct ocrdma_dev {
        struct ocrdma_cq **cq_tbl;
        struct ocrdma_qp **qp_tbl;
 
-       struct ocrdma_eq meq;
-       struct ocrdma_eq *qp_eq_tbl;
+       struct ocrdma_eq *eq_tbl;
        int eq_cnt;
        u16 base_eqid;
        u16 max_eq;
@@ -168,11 +169,12 @@ struct ocrdma_dev {
        struct list_head entry;
        struct rcu_head rcu;
        int id;
+       u64 stag_arr[OCRDMA_MAX_STAG];
+       u16 pvid;
 };
 
 struct ocrdma_cq {
        struct ib_cq ibcq;
-       struct ocrdma_dev *dev;
        struct ocrdma_cqe *va;
        u32 phase;
        u32 getp;       /* pointer to pending wrs to
@@ -214,7 +216,6 @@ struct ocrdma_pd {
 
 struct ocrdma_ah {
        struct ib_ah ibah;
-       struct ocrdma_dev *dev;
        struct ocrdma_av *av;
        u16 sgid_index;
        u32 id;
@@ -234,7 +235,6 @@ struct ocrdma_qp_hwq_info {
 
 struct ocrdma_srq {
        struct ib_srq ibsrq;
-       struct ocrdma_dev *dev;
        u8 __iomem *db;
        struct ocrdma_qp_hwq_info rq;
        u64 *rqe_wr_id_tbl;
@@ -290,10 +290,11 @@ struct ocrdma_qp {
        u32 qkey;
        bool dpp_enabled;
        u8 *ird_q_va;
+       bool signaled;
+       u16 db_cache;
 };
 
 struct ocrdma_hw_mr {
-       struct ocrdma_dev *dev;
        u32 lkey;
        u8 fr_mr;
        u8 remote_atomic;
@@ -317,15 +318,16 @@ struct ocrdma_mr {
        struct ib_mr ibmr;
        struct ib_umem *umem;
        struct ocrdma_hw_mr hwmr;
-       struct ocrdma_pd *pd;
 };
 
 struct ocrdma_ucontext {
        struct ib_ucontext ibucontext;
-       struct ocrdma_dev *dev;
 
        struct list_head mm_head;
        struct mutex mm_list_lock; /* protects list entries of mm type */
+       struct ocrdma_pd *cntxt_pd;
+       int pd_in_use;
+
        struct {
                u32 *va;
                dma_addr_t pa;
@@ -386,14 +388,14 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
 static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
 {
        return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
-                qp->id < 64) ? 24 : 16);
+                qp->id < 128) ? 24 : 16);
 }
 
 static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
 {
        int cqe_valid;
        cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID;
-       return ((cqe_valid == cq->phase) ? 1 : 0);
+       return (cqe_valid == cq->phase);
 }
 
 static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe)
index 517ab20b727c51feac5314a64e717adfdedacde4..fbac8eb44036b02ff7c1233f76e951a0e0adfe8a 100644 (file)
@@ -28,6 +28,9 @@
 #ifndef __OCRDMA_ABI_H__
 #define __OCRDMA_ABI_H__
 
+#define OCRDMA_ABI_VERSION 1
+/* user kernel communication data structures. */
+
 struct ocrdma_alloc_ucontext_resp {
        u32 dev_id;
        u32 wqe_size;
@@ -35,16 +38,16 @@ struct ocrdma_alloc_ucontext_resp {
        u32 dpp_wqe_size;
        u64 ah_tbl_page;
        u32 ah_tbl_len;
-       u32 rsvd;
-       u8 fw_ver[32];
        u32 rqe_size;
+       u8 fw_ver[32];
+       /* for future use/new features in progress */
        u64 rsvd1;
-} __packed;
+       u64 rsvd2;
+};
 
-/* user kernel communication data structures. */
 struct ocrdma_alloc_pd_ureq {
        u64 rsvd1;
-} __packed;
+};
 
 struct ocrdma_alloc_pd_uresp {
        u32 id;
@@ -52,12 +55,12 @@ struct ocrdma_alloc_pd_uresp {
        u32 dpp_page_addr_hi;
        u32 dpp_page_addr_lo;
        u64 rsvd1;
-} __packed;
+};
 
 struct ocrdma_create_cq_ureq {
        u32 dpp_cq;
-       u32 rsvd;
-} __packed;
+       u32 rsvd; /* pad */
+};
 
 #define MAX_CQ_PAGES 8
 struct ocrdma_create_cq_uresp {
@@ -69,9 +72,10 @@ struct ocrdma_create_cq_uresp {
        u64 db_page_addr;
        u32 db_page_size;
        u32 phase_change;
+       /* for future use/new features in progress */
        u64 rsvd1;
        u64 rsvd2;
-} __packed;
+};
 
 #define MAX_QP_PAGES 8
 #define MAX_UD_AV_PAGES 8
@@ -80,14 +84,14 @@ struct ocrdma_create_qp_ureq {
        u8 enable_dpp_cq;
        u8 rsvd;
        u16 dpp_cq_id;
-       u32 rsvd1;
+       u32 rsvd1;      /* pad */
 };
 
 struct ocrdma_create_qp_uresp {
        u16 qp_id;
        u16 sq_dbid;
        u16 rq_dbid;
-       u16 resv0;
+       u16 resv0;      /* pad */
        u32 sq_page_size;
        u32 rq_page_size;
        u32 num_sq_pages;
@@ -98,19 +102,19 @@ struct ocrdma_create_qp_uresp {
        u32 db_page_size;
        u32 dpp_credit;
        u32 dpp_offset;
-       u32 rsvd1;
        u32 num_wqe_allocated;
        u32 num_rqe_allocated;
        u32 db_sq_offset;
        u32 db_rq_offset;
        u32 db_shift;
+       u64 rsvd1;
        u64 rsvd2;
        u64 rsvd3;
 } __packed;
 
 struct ocrdma_create_srq_uresp {
        u16 rq_dbid;
-       u16 resv0;
+       u16 resv0;      /* pad */
        u32 resv1;
 
        u32 rq_page_size;
@@ -126,6 +130,6 @@ struct ocrdma_create_srq_uresp {
 
        u64 rsvd2;
        u64 rsvd3;
-} __packed;
+};
 
 #endif                         /* __OCRDMA_ABI_H__ */
index f4c587c68f648055546deafce7f8708798f4d109..ee499d94225701780bed453abecc1a9ed6e98cff 100644 (file)
 #include "ocrdma_ah.h"
 #include "ocrdma_hw.h"
 
-static inline int set_av_attr(struct ocrdma_ah *ah,
+static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
                                struct ib_ah_attr *attr, int pdid)
 {
        int status = 0;
        u16 vlan_tag; bool vlan_enabled = false;
-       struct ocrdma_dev *dev = ah->dev;
        struct ocrdma_eth_vlan eth;
        struct ocrdma_grh grh;
        int eth_sz;
@@ -51,6 +50,8 @@ static inline int set_av_attr(struct ocrdma_ah *ah,
        ah->sgid_index = attr->grh.sgid_index;
 
        vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
+       if (!vlan_tag || (vlan_tag > 0xFFF))
+               vlan_tag = dev->pvid;
        if (vlan_tag && (vlan_tag < 0x1000)) {
                eth.eth_type = cpu_to_be16(0x8100);
                eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
@@ -92,7 +93,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        int status;
        struct ocrdma_ah *ah;
        struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = pd->dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 
        if (!(attr->ah_flags & IB_AH_GRH))
                return ERR_PTR(-EINVAL);
@@ -100,12 +101,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        ah = kzalloc(sizeof *ah, GFP_ATOMIC);
        if (!ah)
                return ERR_PTR(-ENOMEM);
-       ah->dev = pd->dev;
 
        status = ocrdma_alloc_av(dev, ah);
        if (status)
                goto av_err;
-       status = set_av_attr(ah, attr, pd->id);
+       status = set_av_attr(dev, ah, attr, pd->id);
        if (status)
                goto av_conf_err;
 
@@ -126,7 +126,9 @@ av_err:
 int ocrdma_destroy_ah(struct ib_ah *ibah)
 {
        struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
-       ocrdma_free_av(ah->dev, ah);
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
+
+       ocrdma_free_av(dev, ah);
        kfree(ah);
        return 0;
 }
index 0965278dd2ed7e805f7c1a8e1d9eae03b9c3765b..4ed8235d2d36d818360cce7e2d43010e0840ab62 100644 (file)
@@ -94,7 +94,7 @@ enum cqe_status {
 
 static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq)
 {
-       return (u8 *)eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
+       return eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
 }
 
 static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq)
@@ -105,8 +105,7 @@ static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq)
 static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev)
 {
        struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *)
-           ((u8 *) dev->mq.cq.va +
-            (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
+           (dev->mq.cq.va + (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
 
        if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK))
                return NULL;
@@ -120,9 +119,7 @@ static inline void ocrdma_mcq_inc_tail(struct ocrdma_dev *dev)
 
 static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
 {
-       return (struct ocrdma_mqe *)((u8 *) dev->mq.sq.va +
-                                    (dev->mq.sq.head *
-                                     sizeof(struct ocrdma_mqe)));
+       return dev->mq.sq.va + (dev->mq.sq.head * sizeof(struct ocrdma_mqe));
 }
 
 static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
@@ -132,8 +129,7 @@ static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
 
 static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
 {
-       return (void *)((u8 *) dev->mq.sq.va +
-                       (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe)));
+       return dev->mq.sq.va + (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe));
 }
 
 enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
@@ -181,7 +177,7 @@ static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
 
 static int ocrdma_get_mbx_errno(u32 status)
 {
-       int err_num = -EFAULT;
+       int err_num;
        u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >>
                                        OCRDMA_MBX_RSP_STATUS_SHIFT;
        u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >>
@@ -260,10 +256,11 @@ static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
                break;
        case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES:
        case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING:
-               err_num = -EAGAIN;
+               err_num = -EINVAL;
                break;
        case OCRDMA_MBX_CQE_STATUS_DMA_FAILED:
-               err_num = -EIO;
+       default:
+               err_num = -EINVAL;
                break;
        }
        return err_num;
@@ -367,22 +364,6 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
        }
 }
 
-static void ocrdma_assign_eq_vect_gen2(struct ocrdma_dev *dev,
-                                      struct ocrdma_eq *eq)
-{
-       /* assign vector and update vector id for next EQ */
-       eq->vector = dev->nic_info.msix.start_vector;
-       dev->nic_info.msix.start_vector += 1;
-}
-
-static void ocrdma_free_eq_vect_gen2(struct ocrdma_dev *dev)
-{
-       /* this assumes that EQs are freed in exactly reverse order
-        * as its allocation.
-        */
-       dev->nic_info.msix.start_vector -= 1;
-}
-
 static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
                               int queue_type)
 {
@@ -423,11 +404,8 @@ static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
        memset(cmd, 0, sizeof(*cmd));
        ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON,
                        sizeof(*cmd));
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
-               cmd->req.rsvd_version = 0;
-       else
-               cmd->req.rsvd_version = 2;
 
+       cmd->req.rsvd_version = 2;
        cmd->num_pages = 4;
        cmd->valid = OCRDMA_CREATE_EQ_VALID;
        cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT;
@@ -438,12 +416,7 @@ static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
                                 NULL);
        if (!status) {
                eq->q.id = rsp->vector_eqid & 0xffff;
-               if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
-                       ocrdma_assign_eq_vect_gen2(dev, eq);
-               else {
-                       eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
-                       dev->nic_info.msix.start_vector += 1;
-               }
+               eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
                eq->q.created = true;
        }
        return status;
@@ -486,8 +459,6 @@ static void _ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
 {
        if (eq->q.created) {
                ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ);
-               if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
-                       ocrdma_free_eq_vect_gen2(dev);
                ocrdma_free_q(dev, &eq->q);
        }
 }
@@ -506,13 +477,12 @@ static void ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
        _ocrdma_destroy_eq(dev, eq);
 }
 
-static void ocrdma_destroy_qp_eqs(struct ocrdma_dev *dev)
+static void ocrdma_destroy_eqs(struct ocrdma_dev *dev)
 {
        int i;
 
-       /* deallocate the data path eqs */
        for (i = 0; i < dev->eq_cnt; i++)
-               ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
+               ocrdma_destroy_eq(dev, &dev->eq_tbl[i]);
 }
 
 static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
@@ -527,16 +497,21 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
        ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ,
                        OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
 
-       cmd->pgsz_pgcnt = PAGES_4K_SPANNED(cq->va, cq->size);
+       cmd->req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
+       cmd->pgsz_pgcnt = (cq->size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+               OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
+       cmd->pgsz_pgcnt |= PAGES_4K_SPANNED(cq->va, cq->size);
+
        cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
-       cmd->eqn = (eq->id << OCRDMA_CREATE_CQ_EQID_SHIFT);
+       cmd->eqn = eq->id;
+       cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
 
-       ocrdma_build_q_pages(&cmd->pa[0], cmd->pgsz_pgcnt,
+       ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
                             cq->dma, PAGE_SIZE_4K);
        status = be_roce_mcc_cmd(dev->nic_info.netdev,
                                 cmd, sizeof(*cmd), NULL, NULL);
        if (!status) {
-               cq->id = (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
+               cq->id = (u16) (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
                cq->created = true;
        }
        return status;
@@ -569,7 +544,10 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
        cmd->cqid_pages = num_pages;
        cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
        cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
-       cmd->async_event_bitmap = Bit(20);
+
+       cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE);
+       cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE);
+
        cmd->async_cqid_ringsize = cq->id;
        cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
                                OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
@@ -596,7 +574,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev)
        if (status)
                goto alloc_err;
 
-       status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->meq.q);
+       status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
        if (status)
                goto mbx_cq_free;
 
@@ -653,7 +631,7 @@ static void ocrdma_process_qpcat_error(struct ocrdma_dev *dev,
 
        if (qp == NULL)
                BUG();
-       ocrdma_qp_state_machine(qp, new_ib_qps, &old_ib_qps);
+       ocrdma_qp_state_change(qp, new_ib_qps, &old_ib_qps);
 }
 
 static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
@@ -746,11 +724,35 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
                        qp->srq->ibsrq.event_handler(&ib_evt,
                                                     qp->srq->ibsrq.
                                                     srq_context);
-       } else if (dev_event)
+       } else if (dev_event) {
                ib_dispatch_event(&ib_evt);
+       }
 
 }
 
+static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
+                                       struct ocrdma_ae_mcqe *cqe)
+{
+       struct ocrdma_ae_pvid_mcqe *evt;
+       int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
+                       OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+
+       switch (type) {
+       case OCRDMA_ASYNC_EVENT_PVID_STATE:
+               evt = (struct ocrdma_ae_pvid_mcqe *)cqe;
+               if ((evt->tag_enabled & OCRDMA_AE_PVID_MCQE_ENABLED_MASK) >>
+                       OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT)
+                       dev->pvid = ((evt->tag_enabled &
+                                       OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
+                                       OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
+               break;
+       default:
+               /* Not interested evts. */
+               break;
+       }
+}
+
+
 static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
 {
        /* async CQE processing */
@@ -758,8 +760,10 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
        u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
                        OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
 
-       if (evt_code == OCRDMA_ASYNC_EVE_CODE)
+       if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
                ocrdma_dispatch_ibevent(dev, cqe);
+       else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+               ocrdma_process_grp5_aync(dev, cqe);
        else
                pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
                       dev->id, evt_code);
@@ -957,9 +961,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
        rsp = ocrdma_get_mqe_rsp(dev);
        ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
        if (cqe_status || ext_status) {
-               pr_err
-                   ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
-                    __func__,
+               pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
+                      __func__,
                     (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
                     OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
                status = ocrdma_get_mbx_cqe_errno(cqe_status);
@@ -991,9 +994,15 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_srq_sge = (rsp->max_srq_rqe_sge &
                              OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
+       attr->max_rdma_sge = (rsp->max_write_send_sge &
+                             OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
+           OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
        attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
+       attr->max_srq =
+               (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+               OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
        attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
@@ -1013,6 +1022,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
        attr->max_cqe = rsp->max_cq_cqes_per_cq &
                        OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK;
+       attr->max_cq = (rsp->max_cq_cqes_per_cq &
+                       OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK) >>
+                       OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET;
        attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
                OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >>
                OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) *
@@ -1045,7 +1057,6 @@ static int ocrdma_check_fw_config(struct ocrdma_dev *dev,
                return -EINVAL;
        dev->base_eqid = conf->base_eqid;
        dev->max_eq = conf->max_eq;
-       dev->attr.max_cq = OCRDMA_MAX_CQ - 1;
        return 0;
 }
 
@@ -1118,6 +1129,34 @@ mbx_err:
        return status;
 }
 
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
+{
+       int status = -ENOMEM;
+       struct ocrdma_get_link_speed_rsp *rsp;
+       struct ocrdma_mqe *cmd;
+
+       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1,
+                                 sizeof(*cmd));
+       if (!cmd)
+               return status;
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+                       OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1,
+                       OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+       ((struct ocrdma_mbx_hdr *)cmd->u.cmd)->rsvd_version = 0x1;
+
+       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+       if (status)
+               goto mbx_err;
+
+       rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
+       *lnk_speed = rsp->phys_port_speed;
+
+mbx_err:
+       kfree(cmd);
+       return status;
+}
+
 int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
 {
        int status = -ENOMEM;
@@ -1296,19 +1335,19 @@ static u16 ocrdma_bind_eq(struct ocrdma_dev *dev)
        u16 eq_id;
 
        mutex_lock(&dev->dev_lock);
-       cq_cnt = dev->qp_eq_tbl[0].cq_cnt;
-       eq_id = dev->qp_eq_tbl[0].q.id;
+       cq_cnt = dev->eq_tbl[0].cq_cnt;
+       eq_id = dev->eq_tbl[0].q.id;
        /* find the EQ which is has the least number of
         * CQs associated with it.
         */
        for (i = 0; i < dev->eq_cnt; i++) {
-               if (dev->qp_eq_tbl[i].cq_cnt < cq_cnt) {
-                       cq_cnt = dev->qp_eq_tbl[i].cq_cnt;
-                       eq_id = dev->qp_eq_tbl[i].q.id;
+               if (dev->eq_tbl[i].cq_cnt < cq_cnt) {
+                       cq_cnt = dev->eq_tbl[i].cq_cnt;
+                       eq_id = dev->eq_tbl[i].q.id;
                        selected_eq = i;
                }
        }
-       dev->qp_eq_tbl[selected_eq].cq_cnt += 1;
+       dev->eq_tbl[selected_eq].cq_cnt += 1;
        mutex_unlock(&dev->dev_lock);
        return eq_id;
 }
@@ -1319,16 +1358,16 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
 
        mutex_lock(&dev->dev_lock);
        for (i = 0; i < dev->eq_cnt; i++) {
-               if (dev->qp_eq_tbl[i].q.id != eq_id)
+               if (dev->eq_tbl[i].q.id != eq_id)
                        continue;
-               dev->qp_eq_tbl[i].cq_cnt -= 1;
+               dev->eq_tbl[i].cq_cnt -= 1;
                break;
        }
        mutex_unlock(&dev->dev_lock);
 }
 
 int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
-                        int entries, int dpp_cq)
+                        int entries, int dpp_cq, u16 pd_id)
 {
        int status = -ENOMEM; int max_hw_cqe;
        struct pci_dev *pdev = dev->nic_info.pdev;
@@ -1336,8 +1375,6 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        struct ocrdma_create_cq_rsp *rsp;
        u32 hw_pages, cqe_size, page_size, cqe_count;
 
-       if (dpp_cq)
-               return -EINVAL;
        if (entries > dev->attr.max_cqe) {
                pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
                       __func__, dev->id, dev->attr.max_cqe, entries);
@@ -1377,15 +1414,13 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        cmd->cmd.pgsz_pgcnt |= hw_pages;
        cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
 
-       if (dev->eq_cnt < 0)
-               goto eq_err;
        cq->eqn = ocrdma_bind_eq(dev);
-       cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
+       cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
        cqe_count = cq->len / cqe_size;
-       if (cqe_count > 1024)
+       if (cqe_count > 1024) {
                /* Set cnt to 3 to indicate more than 1024 cq entries */
                cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
-       else {
+       else {
                u8 count = 0;
                switch (cqe_count) {
                case 256:
@@ -1416,6 +1451,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
                cq->phase_change = true;
        }
 
+       cmd->cmd.pd_id = pd_id; /* valid only for v3 */
        ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
        if (status)
@@ -1427,7 +1463,6 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        return 0;
 mbx_err:
        ocrdma_unbind_eq(dev, cq->eqn);
-eq_err:
        dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa);
 mem_err:
        kfree(cmd);
@@ -1524,6 +1559,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
                return -ENOMEM;
        cmd->num_pbl_pdid =
            pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT);
+       cmd->fr_mr = hwmr->fr_mr;
 
        cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr <<
                                    OCRDMA_REG_NSMR_REMOTE_WR_SHIFT);
@@ -1678,8 +1714,16 @@ void ocrdma_flush_qp(struct ocrdma_qp *qp)
        spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags);
 }
 
-int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
-                           enum ib_qp_state *old_ib_state)
+static void ocrdma_init_hwq_ptr(struct ocrdma_qp *qp)
+{
+       qp->sq.head = 0;
+       qp->sq.tail = 0;
+       qp->rq.head = 0;
+       qp->rq.tail = 0;
+}
+
+int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
+                          enum ib_qp_state *old_ib_state)
 {
        unsigned long flags;
        int status = 0;
@@ -1696,96 +1740,15 @@ int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
                return 1;
        }
 
-       switch (qp->state) {
-       case OCRDMA_QPS_RST:
-               switch (new_state) {
-               case OCRDMA_QPS_RST:
-               case OCRDMA_QPS_INIT:
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       case OCRDMA_QPS_INIT:
-               /* qps: INIT->XXX */
-               switch (new_state) {
-               case OCRDMA_QPS_INIT:
-               case OCRDMA_QPS_RTR:
-                       break;
-               case OCRDMA_QPS_ERR:
-                       ocrdma_flush_qp(qp);
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       case OCRDMA_QPS_RTR:
-               /* qps: RTS->XXX */
-               switch (new_state) {
-               case OCRDMA_QPS_RTS:
-                       break;
-               case OCRDMA_QPS_ERR:
-                       ocrdma_flush_qp(qp);
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       case OCRDMA_QPS_RTS:
-               /* qps: RTS->XXX */
-               switch (new_state) {
-               case OCRDMA_QPS_SQD:
-               case OCRDMA_QPS_SQE:
-                       break;
-               case OCRDMA_QPS_ERR:
-                       ocrdma_flush_qp(qp);
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       case OCRDMA_QPS_SQD:
-               /* qps: SQD->XXX */
-               switch (new_state) {
-               case OCRDMA_QPS_RTS:
-               case OCRDMA_QPS_SQE:
-               case OCRDMA_QPS_ERR:
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       case OCRDMA_QPS_SQE:
-               switch (new_state) {
-               case OCRDMA_QPS_RTS:
-               case OCRDMA_QPS_ERR:
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       case OCRDMA_QPS_ERR:
-               /* qps: ERR->XXX */
-               switch (new_state) {
-               case OCRDMA_QPS_RST:
-                       break;
-               default:
-                       status = -EINVAL;
-                       break;
-               };
-               break;
-       default:
-               status = -EINVAL;
-               break;
-       };
-       if (!status)
-               qp->state = new_state;
+
+       if (new_state == OCRDMA_QPS_INIT) {
+               ocrdma_init_hwq_ptr(qp);
+               ocrdma_del_flush_qp(qp);
+       } else if (new_state == OCRDMA_QPS_ERR) {
+               ocrdma_flush_qp(qp);
+       }
+
+       qp->state = new_state;
 
        spin_unlock_irqrestore(&qp->q_lock, flags);
        return status;
@@ -1819,10 +1782,9 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
        u32 max_wqe_allocated;
        u32 max_sges = attrs->cap.max_send_sge;
 
-       max_wqe_allocated = attrs->cap.max_send_wr;
-       /* need to allocate one extra to for GEN1 family */
-       if (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)
-               max_wqe_allocated += 1;
+       /* QP1 may exceed 127 */
+       max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1,
+                               dev->attr.max_wqe);
 
        status = ocrdma_build_q_conf(&max_wqe_allocated,
                dev->attr.wqe_size, &hw_pages, &hw_page_size);
@@ -1934,6 +1896,8 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
        dma_addr_t pa = 0;
        int ird_page_size = dev->attr.ird_page_size;
        int ird_q_len = dev->attr.num_ird_pages * ird_page_size;
+       struct ocrdma_hdr_wqe *rqe;
+       int i  = 0;
 
        if (dev->attr.ird == 0)
                return 0;
@@ -1945,6 +1909,15 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
        memset(qp->ird_q_va, 0, ird_q_len);
        ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
                             pa, ird_page_size);
+       for (; i < ird_q_len / dev->attr.rqe_size; i++) {
+               rqe = (struct ocrdma_hdr_wqe *)(qp->ird_q_va +
+                       (i * dev->attr.rqe_size));
+               rqe->cw = 0;
+               rqe->cw |= 2;
+               rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+               rqe->cw |= (8 << OCRDMA_WQE_SIZE_SHIFT);
+               rqe->cw |= (8 << OCRDMA_WQE_NXT_WQE_SIZE_SHIFT);
+       }
        return 0;
 }
 
@@ -2057,9 +2030,10 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
        qp->rq_cq = cq;
 
        if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
-           (attrs->cap.max_inline_data <= dev->attr.max_inline_data))
+           (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
                ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
                                             dpp_cq_id);
+       }
 
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
        if (status)
@@ -2108,38 +2082,48 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
        struct in6_addr in6;
 
        memcpy(&in6, dgid, sizeof in6);
-       if (rdma_is_multicast_addr(&in6))
+       if (rdma_is_multicast_addr(&in6)) {
                rdma_get_mcast_mac(&in6, mac_addr);
-       else if (rdma_link_local_addr(&in6))
+       } else if (rdma_link_local_addr(&in6)) {
                rdma_get_ll_mac(&in6, mac_addr);
-       else {
+       else {
                pr_err("%s() fail to resolve mac_addr.\n", __func__);
                return -EINVAL;
        }
        return 0;
 }
 
-static void ocrdma_set_av_params(struct ocrdma_qp *qp,
+static int ocrdma_set_av_params(struct ocrdma_qp *qp,
                                struct ocrdma_modify_qp *cmd,
                                struct ib_qp_attr *attrs)
 {
+       int status;
        struct ib_ah_attr *ah_attr = &attrs->ah_attr;
-       union ib_gid sgid;
+       union ib_gid sgid, zgid;
        u32 vlan_id;
        u8 mac_addr[6];
+
        if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
-               return;
+               return -EINVAL;
        cmd->params.tclass_sq_psn |=
            (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
        cmd->params.rnt_rc_sl_fl |=
            (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+       cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT);
        cmd->params.hop_lmt_rq_psn |=
            (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
        cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
        memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
               sizeof(cmd->params.dgid));
-       ocrdma_query_gid(&qp->dev->ibdev, 1,
+       status = ocrdma_query_gid(&qp->dev->ibdev, 1,
                         ah_attr->grh.sgid_index, &sgid);
+       if (status)
+               return status;
+
+       memset(&zgid, 0, sizeof(zgid));
+       if (!memcmp(&sgid, &zgid, sizeof(zgid)))
+               return -EINVAL;
+
        qp->sgid_idx = ah_attr->grh.sgid_index;
        memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
        ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
@@ -2155,6 +2139,7 @@ static void ocrdma_set_av_params(struct ocrdma_qp *qp,
                    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
                cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
        }
+       return 0;
 }
 
 static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
@@ -2163,8 +2148,6 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                                enum ib_qp_state old_qps)
 {
        int status = 0;
-       struct net_device *netdev = qp->dev->nic_info.netdev;
-       int eth_mtu = iboe_get_mtu(netdev->mtu);
 
        if (attr_mask & IB_QP_PKEY_INDEX) {
                cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index &
@@ -2176,9 +2159,11 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                cmd->params.qkey = attrs->qkey;
                cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
        }
-       if (attr_mask & IB_QP_AV)
-               ocrdma_set_av_params(qp, cmd, attrs);
-       else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
+       if (attr_mask & IB_QP_AV) {
+               status = ocrdma_set_av_params(qp, cmd, attrs);
+               if (status)
+                       return status;
+       } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
                /* set the default mac address for UD, GSI QPs */
                cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] |
                        (qp->dev->nic_info.mac_addr[1] << 8) |
@@ -2199,8 +2184,8 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
        }
        if (attr_mask & IB_QP_PATH_MTU) {
-               if (ib_mtu_enum_to_int(eth_mtu) <
-                   ib_mtu_enum_to_int(attrs->path_mtu)) {
+               if (attrs->path_mtu < IB_MTU_256 ||
+                   attrs->path_mtu > IB_MTU_4096) {
                        status = -EINVAL;
                        goto pmtu_err;
                }
@@ -2283,10 +2268,12 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
                     OCRDMA_QP_PARAMS_STATE_SHIFT) &
                    OCRDMA_QP_PARAMS_STATE_MASK;
                cmd->flags |= OCRDMA_QP_PARA_QPS_VALID;
-       } else
+       } else {
                cmd->params.max_sge_recv_flags |=
                    (qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) &
                    OCRDMA_QP_PARAMS_STATE_MASK;
+       }
+
        status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
        if (status)
                goto mbx_err;
@@ -2324,7 +2311,7 @@ mbx_err:
        return status;
 }
 
-int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
+int ocrdma_mbx_create_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
                          struct ib_srq_init_attr *srq_attr,
                          struct ocrdma_pd *pd)
 {
@@ -2334,7 +2321,6 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
        struct ocrdma_create_srq_rsp *rsp;
        struct ocrdma_create_srq *cmd;
        dma_addr_t pa;
-       struct ocrdma_dev *dev = srq->dev;
        struct pci_dev *pdev = dev->nic_info.pdev;
        u32 max_rqe_allocated;
 
@@ -2404,13 +2390,16 @@ int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
 {
        int status = -ENOMEM;
        struct ocrdma_modify_srq *cmd;
-       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+       struct ocrdma_pd *pd = srq->pd;
+       struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
+
+       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_SRQ, sizeof(*cmd));
        if (!cmd)
                return status;
        cmd->id = srq->id;
        cmd->limit_max_rqe |= srq_attr->srq_limit <<
            OCRDMA_MODIFY_SRQ_LIMIT_SHIFT;
-       status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
        kfree(cmd);
        return status;
 }
@@ -2419,11 +2408,13 @@ int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
 {
        int status = -ENOMEM;
        struct ocrdma_query_srq *cmd;
-       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+       struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device);
+
+       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_SRQ, sizeof(*cmd));
        if (!cmd)
                return status;
        cmd->id = srq->rq.dbid;
-       status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
        if (status == 0) {
                struct ocrdma_query_srq_rsp *rsp =
                    (struct ocrdma_query_srq_rsp *)cmd;
@@ -2448,7 +2439,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
        if (!cmd)
                return status;
        cmd->id = srq->id;
-       status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
        if (srq->rq.va)
                dma_free_coherent(&pdev->dev, srq->rq.len,
                                  srq->rq.va, srq->rq.pa);
@@ -2490,38 +2481,7 @@ int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
        return 0;
 }
 
-static int ocrdma_create_mq_eq(struct ocrdma_dev *dev)
-{
-       int status;
-       int irq;
-       unsigned long flags = 0;
-       int num_eq = 0;
-
-       if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
-               flags = IRQF_SHARED;
-       else {
-               num_eq = dev->nic_info.msix.num_vectors -
-                               dev->nic_info.msix.start_vector;
-               /* minimum two vectors/eq are required for rdma to work.
-                * one for control path and one for data path.
-                */
-               if (num_eq < 2)
-                       return -EBUSY;
-       }
-
-       status = ocrdma_create_eq(dev, &dev->meq, OCRDMA_EQ_LEN);
-       if (status)
-               return status;
-       sprintf(dev->meq.irq_name, "ocrdma_mq%d", dev->id);
-       irq = ocrdma_get_irq(dev, &dev->meq);
-       status = request_irq(irq, ocrdma_irq_handler, flags, dev->meq.irq_name,
-                            &dev->meq);
-       if (status)
-               _ocrdma_destroy_eq(dev, &dev->meq);
-       return status;
-}
-
-static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev)
+static int ocrdma_create_eqs(struct ocrdma_dev *dev)
 {
        int num_eq, i, status = 0;
        int irq;
@@ -2532,49 +2492,47 @@ static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev)
        if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) {
                num_eq = 1;
                flags = IRQF_SHARED;
-       } else
+       } else {
                num_eq = min_t(u32, num_eq, num_online_cpus());
-       dev->qp_eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
-       if (!dev->qp_eq_tbl)
+       }
+
+       if (!num_eq)
+               return -EINVAL;
+
+       dev->eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
+       if (!dev->eq_tbl)
                return -ENOMEM;
 
        for (i = 0; i < num_eq; i++) {
-               status = ocrdma_create_eq(dev, &dev->qp_eq_tbl[i],
+               status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
                                          OCRDMA_EQ_LEN);
                if (status) {
                        status = -EINVAL;
                        break;
                }
-               sprintf(dev->qp_eq_tbl[i].irq_name, "ocrdma_qp%d-%d",
+               sprintf(dev->eq_tbl[i].irq_name, "ocrdma%d-%d",
                        dev->id, i);
-               irq = ocrdma_get_irq(dev, &dev->qp_eq_tbl[i]);
+               irq = ocrdma_get_irq(dev, &dev->eq_tbl[i]);
                status = request_irq(irq, ocrdma_irq_handler, flags,
-                                    dev->qp_eq_tbl[i].irq_name,
-                                    &dev->qp_eq_tbl[i]);
-               if (status) {
-                       _ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
-                       status = -EINVAL;
-                       break;
-               }
+                                    dev->eq_tbl[i].irq_name,
+                                    &dev->eq_tbl[i]);
+               if (status)
+                       goto done;
                dev->eq_cnt += 1;
        }
        /* one eq is sufficient for data path to work */
-       if (dev->eq_cnt >= 1)
-               return 0;
-       if (status)
-               ocrdma_destroy_qp_eqs(dev);
+       return 0;
+done:
+       ocrdma_destroy_eqs(dev);
        return status;
 }
 
 int ocrdma_init_hw(struct ocrdma_dev *dev)
 {
        int status;
-       /* set up control path eq */
-       status = ocrdma_create_mq_eq(dev);
-       if (status)
-               return status;
-       /* set up data path eq */
-       status = ocrdma_create_qp_eqs(dev);
+
+       /* create the eqs  */
+       status = ocrdma_create_eqs(dev);
        if (status)
                goto qpeq_err;
        status = ocrdma_create_mq(dev);
@@ -2597,9 +2555,8 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
 conf_err:
        ocrdma_destroy_mq(dev);
 mq_err:
-       ocrdma_destroy_qp_eqs(dev);
+       ocrdma_destroy_eqs(dev);
 qpeq_err:
-       ocrdma_destroy_eq(dev, &dev->meq);
        pr_err("%s() status=%d\n", __func__, status);
        return status;
 }
@@ -2608,10 +2565,9 @@ void ocrdma_cleanup_hw(struct ocrdma_dev *dev)
 {
        ocrdma_mbx_delete_ah_tbl(dev);
 
-       /* cleanup the data path eqs */
-       ocrdma_destroy_qp_eqs(dev);
+       /* cleanup the eqs */
+       ocrdma_destroy_eqs(dev);
 
        /* cleanup the control path */
        ocrdma_destroy_mq(dev);
-       ocrdma_destroy_eq(dev, &dev->meq);
 }
index be5db77404dbae1c9660997d05d85e0ac16c1027..f2a89d4cc7c4b876808f1f8dacf25b96cf5ac00f 100644 (file)
@@ -78,6 +78,11 @@ static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len)
 #endif
 }
 
+static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid)
+{
+       return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size);
+}
+
 int ocrdma_init_hw(struct ocrdma_dev *);
 void ocrdma_cleanup_hw(struct ocrdma_dev *);
 
@@ -86,6 +91,7 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
                       bool solicited, u16 cqe_popped);
 
 /* verbs specific mailbox commands */
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
 int ocrdma_query_config(struct ocrdma_dev *,
                        struct ocrdma_mbx_query_config *config);
 int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
@@ -100,7 +106,7 @@ int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey);
 int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
                        u32 pd_id, int acc);
 int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
-                               int entries, int dpp_cq);
+                               int entries, int dpp_cq, u16 pd_id);
 int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
 
 int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
@@ -112,8 +118,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
 int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
                        struct ocrdma_qp_params *param);
 int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
-
-int ocrdma_mbx_create_srq(struct ocrdma_srq *,
+int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *,
                          struct ib_srq_init_attr *,
                          struct ocrdma_pd *);
 int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *);
@@ -123,7 +128,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *);
 int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *);
 int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *);
 
-int ocrdma_qp_state_machine(struct ocrdma_qp *, enum ib_qp_state new_state,
+int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
                            enum ib_qp_state *old_ib_state);
 bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
 bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
index ded416f1adea559d1f1db591a6641237e551b195..56e004940f1806c9203e788985395ea4ea5bdddb 100644 (file)
@@ -39,6 +39,7 @@
 #include "ocrdma_ah.h"
 #include "be_roce.h"
 #include "ocrdma_hw.h"
+#include "ocrdma_abi.h"
 
 MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
 MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
@@ -265,6 +266,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
               sizeof(OCRDMA_NODE_DESC));
        dev->ibdev.owner = THIS_MODULE;
+       dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION;
        dev->ibdev.uverbs_cmd_mask =
            OCRDMA_UVERBS(GET_CONTEXT) |
            OCRDMA_UVERBS(QUERY_DEVICE) |
@@ -326,9 +328,14 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        dev->ibdev.req_notify_cq = ocrdma_arm_cq;
 
        dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
+       dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
        dev->ibdev.dereg_mr = ocrdma_dereg_mr;
        dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
+       dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr;
+       dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
+       dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
+
        /* mandatory to support user space verbs consumer. */
        dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
        dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
index 36b062da2aea4218d14b77ded42fbc9f05fea383..9f9570ec3c2ee300f828e8275820ca7c25b9e71d 100644 (file)
@@ -70,6 +70,7 @@ enum {
 
 #define OCRDMA_SUBSYS_COMMON 1
 enum {
+       OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1 = 5,
        OCRDMA_CMD_CREATE_CQ            = 12,
        OCRDMA_CMD_CREATE_EQ            = 13,
        OCRDMA_CMD_CREATE_MQ            = 21,
@@ -91,15 +92,15 @@ enum {
 
 #define OCRDMA_MAX_QP    2048
 #define OCRDMA_MAX_CQ    2048
+#define OCRDMA_MAX_STAG  8192
 
 enum {
        OCRDMA_DB_RQ_OFFSET             = 0xE0,
-       OCRDMA_DB_GEN2_RQ1_OFFSET       = 0x100,
-       OCRDMA_DB_GEN2_RQ2_OFFSET       = 0xC0,
+       OCRDMA_DB_GEN2_RQ_OFFSET        = 0x100,
        OCRDMA_DB_SQ_OFFSET             = 0x60,
        OCRDMA_DB_GEN2_SQ_OFFSET        = 0x1C0,
        OCRDMA_DB_SRQ_OFFSET            = OCRDMA_DB_RQ_OFFSET,
-       OCRDMA_DB_GEN2_SRQ_OFFSET       = OCRDMA_DB_GEN2_RQ1_OFFSET,
+       OCRDMA_DB_GEN2_SRQ_OFFSET       = OCRDMA_DB_GEN2_RQ_OFFSET,
        OCRDMA_DB_CQ_OFFSET             = 0x120,
        OCRDMA_DB_EQ_OFFSET             = OCRDMA_DB_CQ_OFFSET,
        OCRDMA_DB_MQ_OFFSET             = 0x140
@@ -143,8 +144,11 @@ enum {
 # 2: 16K Bytes
 # 3: 32K Bytes
 # 4: 64K Bytes
+# 5: 128K Bytes
+# 6: 256K Bytes
+# 7: 512K Bytes
 */
-#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (5)
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8)
 #define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
 
 #define MAX_OCRDMA_QP_PAGES      (8)
@@ -177,7 +181,7 @@ struct ocrdma_mbx_hdr {
        u32 timeout;            /* in seconds */
        u32 cmd_len;
        u32 rsvd_version;
-} __packed;
+};
 
 enum {
        OCRDMA_MBX_RSP_OPCODE_SHIFT     = 0,
@@ -197,7 +201,7 @@ struct ocrdma_mbx_rsp {
        u32 status;
        u32 rsp_len;
        u32 add_rsp_len;
-} __packed;
+};
 
 enum {
        OCRDMA_MQE_EMBEDDED     = 1,
@@ -208,7 +212,7 @@ struct ocrdma_mqe_sge {
        u32 pa_lo;
        u32 pa_hi;
        u32 len;
-} __packed;
+};
 
 enum {
        OCRDMA_MQE_HDR_EMB_SHIFT        = 0,
@@ -225,12 +229,12 @@ struct ocrdma_mqe_hdr {
        u32 tag_lo;
        u32 tag_hi;
        u32 rsvd3;
-} __packed;
+};
 
 struct ocrdma_mqe_emb_cmd {
        struct ocrdma_mbx_hdr mch;
        u8 pyld[220];
-} __packed;
+};
 
 struct ocrdma_mqe {
        struct ocrdma_mqe_hdr hdr;
@@ -242,7 +246,7 @@ struct ocrdma_mqe {
                u8 cmd[236];
                struct ocrdma_mbx_rsp rsp;
        } u;
-} __packed;
+};
 
 #define OCRDMA_EQ_LEN       4096
 #define OCRDMA_MQ_CQ_LEN    256
@@ -259,12 +263,12 @@ struct ocrdma_mqe {
 struct ocrdma_delete_q_req {
        struct ocrdma_mbx_hdr req;
        u32 id;
-} __packed;
+};
 
 struct ocrdma_pa {
        u32 lo;
        u32 hi;
-} __packed;
+};
 
 #define MAX_OCRDMA_EQ_PAGES (8)
 struct ocrdma_create_eq_req {
@@ -275,7 +279,7 @@ struct ocrdma_create_eq_req {
        u32 delay;
        u32 rsvd;
        struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES];
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_EQ_VALID  = Bit(29),
@@ -310,7 +314,7 @@ struct ocrdma_mcqe {
        u32 tag_lo;
        u32 tag_hi;
        u32 valid_ae_cmpl_cons;
-} __packed;
+};
 
 enum {
        OCRDMA_AE_MCQE_QPVALID          = Bit(31),
@@ -332,7 +336,21 @@ struct ocrdma_ae_mcqe {
        u32 cqvalid_cqid;
        u32 evt_tag;
        u32 valid_ae_event;
-} __packed;
+};
+
+enum {
+       OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT = 0,
+       OCRDMA_AE_PVID_MCQE_ENABLED_MASK  = 0xFF,
+       OCRDMA_AE_PVID_MCQE_TAG_SHIFT = 16,
+       OCRDMA_AE_PVID_MCQE_TAG_MASK = 0xFFFF << OCRDMA_AE_PVID_MCQE_TAG_SHIFT
+};
+
+struct ocrdma_ae_pvid_mcqe {
+       u32 tag_enabled;
+       u32 event_tag;
+       u32 rsvd1;
+       u32 rsvd2;
+};
 
 enum {
        OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT         = 16,
@@ -356,7 +374,7 @@ struct ocrdma_ae_mpa_mcqe {
        u32 w1;
        u32 w2;
        u32 valid_ae_event;
-} __packed;
+};
 
 enum {
        OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT    = 0,
@@ -382,9 +400,11 @@ struct ocrdma_ae_qp_mcqe {
        u32 w1;
        u32 w2;
        u32 valid_ae_event;
-} __packed;
+};
 
-#define OCRDMA_ASYNC_EVE_CODE 0x14
+#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
+#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
+#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
 
 enum OCRDMA_ASYNC_EVENT_TYPE {
        OCRDMA_CQ_ERROR                 = 0x00,
@@ -487,7 +507,8 @@ struct ocrdma_mbx_query_config {
        u32 max_ird_ord_per_qp;
        u32 max_shared_ird_ord;
        u32 max_mr;
-       u64 max_mr_size;
+       u32 max_mr_size_lo;
+       u32 max_mr_size_hi;
        u32 max_num_mr_pbl;
        u32 max_mw;
        u32 max_fmr;
@@ -502,14 +523,14 @@ struct ocrdma_mbx_query_config {
        u32 max_wqes_rqes_per_q;
        u32 max_cq_cqes_per_cq;
        u32 max_srq_rqe_sge;
-} __packed;
+};
 
 struct ocrdma_fw_ver_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
 
        u8 running_ver[32];
-} __packed;
+};
 
 struct ocrdma_fw_conf_rsp {
        struct ocrdma_mqe_hdr hdr;
@@ -535,14 +556,41 @@ struct ocrdma_fw_conf_rsp {
        u32 base_eqid;
        u32 max_eq;
 
-} __packed;
+};
 
 enum {
        OCRDMA_FN_MODE_RDMA     = 0x4
 };
 
+struct ocrdma_get_link_speed_rsp {
+       struct ocrdma_mqe_hdr hdr;
+       struct ocrdma_mbx_rsp rsp;
+
+       u8 pt_port_num;
+       u8 link_duplex;
+       u8 phys_port_speed;
+       u8 phys_port_fault;
+       u16 rsvd1;
+       u16 qos_lnk_speed;
+       u8 logical_lnk_status;
+       u8 rsvd2[3];
+};
+
+enum {
+       OCRDMA_PHYS_LINK_SPEED_ZERO = 0x0,
+       OCRDMA_PHYS_LINK_SPEED_10MBPS = 0x1,
+       OCRDMA_PHYS_LINK_SPEED_100MBPS = 0x2,
+       OCRDMA_PHYS_LINK_SPEED_1GBPS = 0x3,
+       OCRDMA_PHYS_LINK_SPEED_10GBPS = 0x4,
+       OCRDMA_PHYS_LINK_SPEED_20GBPS = 0x5,
+       OCRDMA_PHYS_LINK_SPEED_25GBPS = 0x6,
+       OCRDMA_PHYS_LINK_SPEED_40GBPS = 0x7,
+       OCRDMA_PHYS_LINK_SPEED_100GBPS = 0x8
+};
+
 enum {
        OCRDMA_CREATE_CQ_VER2                   = 2,
+       OCRDMA_CREATE_CQ_VER3                   = 3,
 
        OCRDMA_CREATE_CQ_PAGE_CNT_MASK          = 0xFFFF,
        OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT        = 16,
@@ -576,7 +624,8 @@ struct ocrdma_create_cq_cmd {
        u32 pgsz_pgcnt;
        u32 ev_cnt_flags;
        u32 eqn;
-       u32 cqe_count;
+       u16 cqe_count;
+       u16 pd_id;
        u32 rsvd6;
        struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
 };
@@ -584,7 +633,7 @@ struct ocrdma_create_cq_cmd {
 struct ocrdma_create_cq {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_create_cq_cmd cmd;
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
@@ -593,12 +642,12 @@ enum {
 struct ocrdma_create_cq_cmd_rsp {
        struct ocrdma_mbx_rsp rsp;
        u32 cq_id;
-} __packed;
+};
 
 struct ocrdma_create_cq_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_create_cq_cmd_rsp rsp;
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT         = 22,
@@ -617,12 +666,12 @@ struct ocrdma_create_mq_req {
        u32 async_cqid_valid;
        u32 rsvd;
        struct ocrdma_pa pa[8];
-} __packed;
+};
 
 struct ocrdma_create_mq_rsp {
        struct ocrdma_mbx_rsp rsp;
        u32 id;
-} __packed;
+};
 
 enum {
        OCRDMA_DESTROY_CQ_QID_SHIFT                     = 0,
@@ -637,12 +686,12 @@ struct ocrdma_destroy_cq {
        struct ocrdma_mbx_hdr req;
 
        u32 bypass_flush_qid;
-} __packed;
+};
 
 struct ocrdma_destroy_cq_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 enum {
        OCRDMA_QPT_GSI  = 1,
@@ -766,7 +815,7 @@ struct ocrdma_create_qp_req {
        u32 dpp_credits_cqid;
        u32 rpir_lkey;
        struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES];
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT                = 0,
@@ -820,18 +869,18 @@ struct ocrdma_create_qp_rsp {
        u32 max_ord_ird;
        u32 sq_rq_id;
        u32 dpp_response;
-} __packed;
+};
 
 struct ocrdma_destroy_qp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr req;
        u32 qp_id;
-} __packed;
+};
 
 struct ocrdma_destroy_qp_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 enum {
        OCRDMA_MODIFY_QP_ID_SHIFT       = 0,
@@ -975,7 +1024,7 @@ struct ocrdma_qp_params {
        u32 dmac_b0_to_b3;
        u32 vlan_dmac_b4_to_b5;
        u32 qkey;
-} __packed;
+};
 
 
 struct ocrdma_modify_qp {
@@ -986,7 +1035,7 @@ struct ocrdma_modify_qp {
        u32 flags;
        u32 rdma_flags;
        u32 num_outstanding_atomic_rd;
-} __packed;
+};
 
 enum {
        OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT      = 0,
@@ -1007,7 +1056,7 @@ struct ocrdma_modify_qp_rsp {
 
        u32 max_wqe_rqe;
        u32 max_ord_ird;
-} __packed;
+};
 
 struct ocrdma_query_qp {
        struct ocrdma_mqe_hdr hdr;
@@ -1016,13 +1065,13 @@ struct ocrdma_query_qp {
 #define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
 #define OCRDMA_QUERY_UP_QP_ID_MASK   0xFFFFFF
        u32 qp_id;
-} __packed;
+};
 
 struct ocrdma_query_qp_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
        struct ocrdma_qp_params params;
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_SRQ_PD_ID_SHIFT           = 0,
@@ -1051,7 +1100,7 @@ struct ocrdma_create_srq {
        u32 max_sge_rqe;
        u32 pages_rqe_sz;
        struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES];
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT                      = 0,
@@ -1070,7 +1119,7 @@ struct ocrdma_create_srq_rsp {
 
        u32 id;
        u32 max_sge_rqe_allocated;
-} __packed;
+};
 
 enum {
        OCRDMA_MODIFY_SRQ_ID_SHIFT      = 0,
@@ -1089,7 +1138,7 @@ struct ocrdma_modify_srq {
 
        u32 id;
        u32 limit_max_rqe;
-} __packed;
+};
 
 enum {
        OCRDMA_QUERY_SRQ_ID_SHIFT       = 0,
@@ -1101,7 +1150,7 @@ struct ocrdma_query_srq {
        struct ocrdma_mbx_rsp req;
 
        u32 id;
-} __packed;
+};
 
 enum {
        OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT        = 0,
@@ -1123,7 +1172,7 @@ struct ocrdma_query_srq_rsp {
 
        u32 max_rqe_pdid;
        u32 srq_lmt_max_sge;
-} __packed;
+};
 
 enum {
        OCRDMA_DESTROY_SRQ_ID_SHIFT     = 0,
@@ -1135,7 +1184,7 @@ struct ocrdma_destroy_srq {
        struct ocrdma_mbx_rsp req;
 
        u32 id;
-} __packed;
+};
 
 enum {
        OCRDMA_ALLOC_PD_ENABLE_DPP      = BIT(16),
@@ -1147,7 +1196,7 @@ struct ocrdma_alloc_pd {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr req;
        u32 enable_dpp_rsvd;
-} __packed;
+};
 
 enum {
        OCRDMA_ALLOC_PD_RSP_DPP                 = Bit(16),
@@ -1159,18 +1208,18 @@ struct ocrdma_alloc_pd_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
        u32 dpp_page_pdid;
-} __packed;
+};
 
 struct ocrdma_dealloc_pd {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr req;
        u32 id;
-} __packed;
+};
 
 struct ocrdma_dealloc_pd_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 enum {
        OCRDMA_ADDR_CHECK_ENABLE        = 1,
@@ -1206,7 +1255,7 @@ struct ocrdma_alloc_lkey {
 
        u32 pdid;
        u32 pbl_sz_flags;
-} __packed;
+};
 
 struct ocrdma_alloc_lkey_rsp {
        struct ocrdma_mqe_hdr hdr;
@@ -1214,7 +1263,7 @@ struct ocrdma_alloc_lkey_rsp {
 
        u32 lrkey;
        u32 num_pbl_rsvd;
-} __packed;
+};
 
 struct ocrdma_dealloc_lkey {
        struct ocrdma_mqe_hdr hdr;
@@ -1222,12 +1271,12 @@ struct ocrdma_dealloc_lkey {
 
        u32 lkey;
        u32 rsvd_frmr;
-} __packed;
+};
 
 struct ocrdma_dealloc_lkey_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 #define MAX_OCRDMA_NSMR_PBL    (u32)22
 #define MAX_OCRDMA_PBL_SIZE     65536
@@ -1273,7 +1322,7 @@ struct ocrdma_reg_nsmr {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr cmd;
 
-       u32 lrkey_key_index;
+       u32 fr_mr;
        u32 num_pbl_pdid;
        u32 flags_hpage_pbe_sz;
        u32 totlen_low;
@@ -1283,7 +1332,7 @@ struct ocrdma_reg_nsmr {
        u32 va_loaddr;
        u32 va_hiaddr;
        struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
-} __packed;
+};
 
 enum {
        OCRDMA_REG_NSMR_CONT_PBL_SHIFT          = 0,
@@ -1305,12 +1354,12 @@ struct ocrdma_reg_nsmr_cont {
        u32 last;
 
        struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
-} __packed;
+};
 
 struct ocrdma_pbe {
        u32 pa_hi;
        u32 pa_lo;
-} __packed;
+};
 
 enum {
        OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT       = 16,
@@ -1322,7 +1371,7 @@ struct ocrdma_reg_nsmr_rsp {
 
        u32 lrkey;
        u32 num_pbl;
-} __packed;
+};
 
 enum {
        OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT      = 0,
@@ -1342,7 +1391,7 @@ struct ocrdma_reg_nsmr_cont_rsp {
 
        u32 lrkey_key_index;
        u32 num_pbl;
-} __packed;
+};
 
 enum {
        OCRDMA_ALLOC_MW_PD_ID_SHIFT     = 0,
@@ -1354,7 +1403,7 @@ struct ocrdma_alloc_mw {
        struct ocrdma_mbx_hdr req;
 
        u32 pdid;
-} __packed;
+};
 
 enum {
        OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT   = 0,
@@ -1366,7 +1415,7 @@ struct ocrdma_alloc_mw_rsp {
        struct ocrdma_mbx_rsp rsp;
 
        u32 lrkey_index;
-} __packed;
+};
 
 struct ocrdma_attach_mcast {
        struct ocrdma_mqe_hdr hdr;
@@ -1375,12 +1424,12 @@ struct ocrdma_attach_mcast {
        u8 mgid[16];
        u32 mac_b0_to_b3;
        u32 vlan_mac_b4_to_b5;
-} __packed;
+};
 
 struct ocrdma_attach_mcast_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 struct ocrdma_detach_mcast {
        struct ocrdma_mqe_hdr hdr;
@@ -1389,12 +1438,12 @@ struct ocrdma_detach_mcast {
        u8 mgid[16];
        u32 mac_b0_to_b3;
        u32 vlan_mac_b4_to_b5;
-} __packed;
+};
 
 struct ocrdma_detach_mcast_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 enum {
        OCRDMA_CREATE_AH_NUM_PAGES_SHIFT        = 19,
@@ -1418,24 +1467,24 @@ struct ocrdma_create_ah_tbl {
 
        u32 ah_conf;
        struct ocrdma_pa tbl_addr[8];
-} __packed;
+};
 
 struct ocrdma_create_ah_tbl_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
        u32 ahid;
-} __packed;
+};
 
 struct ocrdma_delete_ah_tbl {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr req;
        u32 ahid;
-} __packed;
+};
 
 struct ocrdma_delete_ah_tbl_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
-} __packed;
+};
 
 enum {
        OCRDMA_EQE_VALID_SHIFT          = 0,
@@ -1448,7 +1497,7 @@ enum {
 
 struct ocrdma_eqe {
        u32 id_valid;
-} __packed;
+};
 
 enum OCRDMA_CQE_STATUS {
        OCRDMA_CQE_SUCCESS = 0,
@@ -1532,14 +1581,14 @@ struct ocrdma_cqe {
                } cmn;
        };
        u32 flags_status_srcqpn;        /* w3 */
-} __packed;
+};
 
 struct ocrdma_sge {
        u32 addr_hi;
        u32 addr_lo;
        u32 lrkey;
        u32 len;
-} __packed;
+};
 
 enum {
        OCRDMA_FLAG_SIG         = 0x1,
@@ -1563,6 +1612,7 @@ enum OCRDMA_WQE_OPCODE {
        OCRDMA_SEND             = 0x00,
        OCRDMA_CMP_SWP          = 0x14,
        OCRDMA_BIND_MW          = 0x10,
+       OCRDMA_FR_MR            = 0x11,
        OCRDMA_RESV1            = 0x0A,
        OCRDMA_LKEY_INV         = 0x15,
        OCRDMA_FETCH_ADD        = 0x13,
@@ -1600,14 +1650,26 @@ struct ocrdma_hdr_wqe {
                u32 lkey;
        };
        u32 total_len;
-} __packed;
+};
 
 struct ocrdma_ewqe_ud_hdr {
        u32 rsvd_dest_qpn;
        u32 qkey;
        u32 rsvd_ahid;
        u32 rsvd;
-} __packed;
+};
+
+/* extended wqe followed by hdr_wqe for Fast Memory register */
+struct ocrdma_ewqe_fr {
+       u32 va_hi;
+       u32 va_lo;
+       u32 fbo_hi;
+       u32 fbo_lo;
+       u32 size_sge;
+       u32 num_sges;
+       u32 rsvd;
+       u32 rsvd2;
+};
 
 struct ocrdma_eth_basic {
        u8 dmac[6];
index f36630e4b6be182c735b7aa81d357b4207266c82..6e982bb43c3172d2cc36b844c1b97f8bb0960237 100644 (file)
@@ -75,14 +75,15 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
        attr->vendor_part_id = dev->nic_info.pdev->device;
        attr->hw_ver = 0;
        attr->max_qp = dev->attr.max_qp;
-       attr->max_ah = dev->attr.max_qp;
+       attr->max_ah = OCRDMA_MAX_AH;
        attr->max_qp_wr = dev->attr.max_wqe;
 
        attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
                                        IB_DEVICE_RC_RNR_NAK_GEN |
                                        IB_DEVICE_SHUTDOWN_PORT |
                                        IB_DEVICE_SYS_IMAGE_GUID |
-                                       IB_DEVICE_LOCAL_DMA_LKEY;
+                                       IB_DEVICE_LOCAL_DMA_LKEY |
+                                       IB_DEVICE_MEM_MGT_EXTENSIONS;
        attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
        attr->max_sge_rd = 0;
        attr->max_cq = dev->attr.max_cq;
@@ -96,7 +97,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
        attr->max_qp_rd_atom =
            min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
        attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
-       attr->max_srq = (dev->attr.max_qp - 1);
+       attr->max_srq = dev->attr.max_srq;
        attr->max_srq_sge = dev->attr.max_srq_sge;
        attr->max_srq_wr = dev->attr.max_rqe;
        attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
@@ -105,6 +106,45 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
        return 0;
 }
 
+static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
+                                           u8 *ib_speed, u8 *ib_width)
+{
+       int status;
+       u8 speed;
+
+       status = ocrdma_mbx_get_link_speed(dev, &speed);
+       if (status)
+               speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
+
+       switch (speed) {
+       case OCRDMA_PHYS_LINK_SPEED_1GBPS:
+               *ib_speed = IB_SPEED_SDR;
+               *ib_width = IB_WIDTH_1X;
+               break;
+
+       case OCRDMA_PHYS_LINK_SPEED_10GBPS:
+               *ib_speed = IB_SPEED_QDR;
+               *ib_width = IB_WIDTH_1X;
+               break;
+
+       case OCRDMA_PHYS_LINK_SPEED_20GBPS:
+               *ib_speed = IB_SPEED_DDR;
+               *ib_width = IB_WIDTH_4X;
+               break;
+
+       case OCRDMA_PHYS_LINK_SPEED_40GBPS:
+               *ib_speed = IB_SPEED_QDR;
+               *ib_width = IB_WIDTH_4X;
+               break;
+
+       default:
+               /* Unsupported */
+               *ib_speed = IB_SPEED_SDR;
+               *ib_width = IB_WIDTH_1X;
+       };
+}
+
+
 int ocrdma_query_port(struct ib_device *ibdev,
                      u8 port, struct ib_port_attr *props)
 {
@@ -141,8 +181,8 @@ int ocrdma_query_port(struct ib_device *ibdev,
        props->pkey_tbl_len = 1;
        props->bad_pkey_cntr = 0;
        props->qkey_viol_cntr = 0;
-       props->active_width = IB_WIDTH_1X;
-       props->active_speed = 4;
+       get_link_speed_and_width(dev, &props->active_speed,
+                                &props->active_width);
        props->max_msg_sz = 0x80000000;
        props->max_vl_num = 4;
        return 0;
@@ -186,7 +226,7 @@ static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
 
        mutex_lock(&uctx->mm_list_lock);
        list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
-               if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+               if (len != mm->key.len && phy_addr != mm->key.phy_addr)
                        continue;
 
                list_del(&mm->entry);
@@ -204,7 +244,7 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
 
        mutex_lock(&uctx->mm_list_lock);
        list_for_each_entry(mm, &uctx->mm_head, entry) {
-               if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+               if (len != mm->key.len && phy_addr != mm->key.phy_addr)
                        continue;
 
                found = true;
@@ -214,6 +254,108 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
        return found;
 }
 
+static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
+                                         struct ocrdma_ucontext *uctx,
+                                         struct ib_udata *udata)
+{
+       struct ocrdma_pd *pd = NULL;
+       int status = 0;
+
+       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+       if (!pd)
+               return ERR_PTR(-ENOMEM);
+
+       if (udata && uctx) {
+               pd->dpp_enabled =
+                       dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
+               pd->num_dpp_qp =
+                       pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+       }
+
+retry:
+       status = ocrdma_mbx_alloc_pd(dev, pd);
+       if (status) {
+               if (pd->dpp_enabled) {
+                       pd->dpp_enabled = false;
+                       pd->num_dpp_qp = 0;
+                       goto retry;
+               } else {
+                       kfree(pd);
+                       return ERR_PTR(status);
+               }
+       }
+
+       return pd;
+}
+
+static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
+                                struct ocrdma_pd *pd)
+{
+       return (uctx->cntxt_pd == pd ? true : false);
+}
+
+static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
+                             struct ocrdma_pd *pd)
+{
+       int status = 0;
+
+       status = ocrdma_mbx_dealloc_pd(dev, pd);
+       kfree(pd);
+       return status;
+}
+
+static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
+                                   struct ocrdma_ucontext *uctx,
+                                   struct ib_udata *udata)
+{
+       int status = 0;
+
+       uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
+       if (IS_ERR(uctx->cntxt_pd)) {
+               status = PTR_ERR(uctx->cntxt_pd);
+               uctx->cntxt_pd = NULL;
+               goto err;
+       }
+
+       uctx->cntxt_pd->uctx = uctx;
+       uctx->cntxt_pd->ibpd.device = &dev->ibdev;
+err:
+       return status;
+}
+
+static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+       int status = 0;
+       struct ocrdma_pd *pd = uctx->cntxt_pd;
+       struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
+
+       BUG_ON(uctx->pd_in_use);
+       uctx->cntxt_pd = NULL;
+       status = _ocrdma_dealloc_pd(dev, pd);
+       return status;
+}
+
+static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+       struct ocrdma_pd *pd = NULL;
+
+       mutex_lock(&uctx->mm_list_lock);
+       if (!uctx->pd_in_use) {
+               uctx->pd_in_use = true;
+               pd = uctx->cntxt_pd;
+       }
+       mutex_unlock(&uctx->mm_list_lock);
+
+       return pd;
+}
+
+static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+       mutex_lock(&uctx->mm_list_lock);
+       uctx->pd_in_use = false;
+       mutex_unlock(&uctx->mm_list_lock);
+}
+
 struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
                                          struct ib_udata *udata)
 {
@@ -229,7 +371,6 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx)
                return ERR_PTR(-ENOMEM);
-       ctx->dev = dev;
        INIT_LIST_HEAD(&ctx->mm_head);
        mutex_init(&ctx->mm_list_lock);
 
@@ -249,6 +390,11 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
        status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
        if (status)
                goto map_err;
+
+       status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
+       if (status)
+               goto pd_err;
+
        resp.dev_id = dev->id;
        resp.max_inline_data = dev->attr.max_inline_data;
        resp.wqe_size = dev->attr.wqe_size;
@@ -262,6 +408,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
        return &ctx->ibucontext;
 
 cpy_err:
+pd_err:
        ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
 map_err:
        dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
@@ -272,9 +419,13 @@ map_err:
 
 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
 {
+       int status = 0;
        struct ocrdma_mm *mm, *tmp;
        struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
-       struct pci_dev *pdev = uctx->dev->nic_info.pdev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
+       struct pci_dev *pdev = dev->nic_info.pdev;
+
+       status = ocrdma_dealloc_ucontext_pd(uctx);
 
        ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
        dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
@@ -285,13 +436,13 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
                kfree(mm);
        }
        kfree(uctx);
-       return 0;
+       return status;
 }
 
 int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
        struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
-       struct ocrdma_dev *dev = ucontext->dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
        unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
        u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
        unsigned long len = (vma->vm_end - vma->vm_start);
@@ -307,7 +458,10 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
        if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
                dev->nic_info.db_total_size)) &&
                (len <= dev->nic_info.db_page_size)) {
-               /* doorbell mapping */
+               if (vma->vm_flags & VM_READ)
+                       return -EPERM;
+
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
                status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                            len, vma->vm_page_prot);
        } else if (dev->nic_info.dpp_unmapped_len &&
@@ -315,19 +469,20 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
                        dev->nic_info.dpp_unmapped_len)) &&
                (len <= dev->nic_info.dpp_unmapped_len)) {
-               /* dpp area mapping */
+               if (vma->vm_flags & VM_READ)
+                       return -EPERM;
+
                vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
                status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                            len, vma->vm_page_prot);
        } else {
-               /* queue memory mapping */
                status = remap_pfn_range(vma, vma->vm_start,
                                         vma->vm_pgoff, len, vma->vm_page_prot);
        }
        return status;
 }
 
-static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
+static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
                                struct ib_ucontext *ib_ctx,
                                struct ib_udata *udata)
 {
@@ -341,19 +496,18 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
        memset(&rsp, 0, sizeof(rsp));
        rsp.id = pd->id;
        rsp.dpp_enabled = pd->dpp_enabled;
-       db_page_addr = pd->dev->nic_info.unmapped_db +
-                       (pd->id * pd->dev->nic_info.db_page_size);
-       db_page_size = pd->dev->nic_info.db_page_size;
+       db_page_addr = ocrdma_get_db_addr(dev, pd->id);
+       db_page_size = dev->nic_info.db_page_size;
 
        status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
        if (status)
                return status;
 
        if (pd->dpp_enabled) {
-               dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr +
-                               (pd->id * OCRDMA_DPP_PAGE_SIZE);
+               dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
+                               (pd->id * PAGE_SIZE);
                status = ocrdma_add_mmap(uctx, dpp_page_addr,
-                                OCRDMA_DPP_PAGE_SIZE);
+                                PAGE_SIZE);
                if (status)
                        goto dpp_map_err;
                rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
@@ -369,7 +523,7 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
 
 ucopy_err:
        if (pd->dpp_enabled)
-               ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
+               ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
 dpp_map_err:
        ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
        return status;
@@ -381,76 +535,75 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
 {
        struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
        struct ocrdma_pd *pd;
+       struct ocrdma_ucontext *uctx = NULL;
        int status;
+       u8 is_uctx_pd = false;
 
-       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-       if (!pd)
-               return ERR_PTR(-ENOMEM);
-       pd->dev = dev;
        if (udata && context) {
-               pd->dpp_enabled = (dev->nic_info.dev_family ==
-                                       OCRDMA_GEN2_FAMILY) ? true : false;
-               pd->num_dpp_qp =
-                       pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+               uctx = get_ocrdma_ucontext(context);
+               pd = ocrdma_get_ucontext_pd(uctx);
+               if (pd) {
+                       is_uctx_pd = true;
+                       goto pd_mapping;
+               }
        }
-       status = ocrdma_mbx_alloc_pd(dev, pd);
-       if (status) {
-               kfree(pd);
-               return ERR_PTR(status);
+
+       pd = _ocrdma_alloc_pd(dev, uctx, udata);
+       if (IS_ERR(pd)) {
+               status = PTR_ERR(pd);
+               goto exit;
        }
 
+pd_mapping:
        if (udata && context) {
-               status = ocrdma_copy_pd_uresp(pd, context, udata);
+               status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
                if (status)
                        goto err;
        }
        return &pd->ibpd;
 
 err:
-       ocrdma_dealloc_pd(&pd->ibpd);
+       if (is_uctx_pd) {
+               ocrdma_release_ucontext_pd(uctx);
+       } else {
+               status = ocrdma_mbx_dealloc_pd(dev, pd);
+               kfree(pd);
+       }
+exit:
        return ERR_PTR(status);
 }
 
 int ocrdma_dealloc_pd(struct ib_pd *ibpd)
 {
        struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = pd->dev;
-       int status;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+       struct ocrdma_ucontext *uctx = NULL;
+       int status = 0;
        u64 usr_db;
 
-       status = ocrdma_mbx_dealloc_pd(dev, pd);
-       if (pd->uctx) {
+       uctx = pd->uctx;
+       if (uctx) {
                u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
-                   (pd->id * OCRDMA_DPP_PAGE_SIZE);
+                       (pd->id * PAGE_SIZE);
                if (pd->dpp_enabled)
-                       ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
-               usr_db = dev->nic_info.unmapped_db +
-                   (pd->id * dev->nic_info.db_page_size);
+                       ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
+               usr_db = ocrdma_get_db_addr(dev, pd->id);
                ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
+
+               if (is_ucontext_pd(uctx, pd)) {
+                       ocrdma_release_ucontext_pd(uctx);
+                       return status;
+               }
        }
-       kfree(pd);
+       status = _ocrdma_dealloc_pd(dev, pd);
        return status;
 }
 
-static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
-                                          int acc, u32 num_pbls,
-                                          u32 addr_check)
+static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+                           u32 pdid, int acc, u32 num_pbls, u32 addr_check)
 {
        int status;
-       struct ocrdma_mr *mr;
-       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = pd->dev;
-
-       if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
-               pr_err("%s(%d) leaving err, invalid access rights\n",
-                      __func__, dev->id);
-               return ERR_PTR(-EINVAL);
-       }
 
-       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(-ENOMEM);
-       mr->hwmr.dev = dev;
        mr->hwmr.fr_mr = 0;
        mr->hwmr.local_rd = 1;
        mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
@@ -460,25 +613,38 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
        mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
        mr->hwmr.num_pbls = num_pbls;
 
-       status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check);
-       if (status) {
-               kfree(mr);
-               return ERR_PTR(-ENOMEM);
-       }
-       mr->pd = pd;
+       status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
+       if (status)
+               return status;
+
        mr->ibmr.lkey = mr->hwmr.lkey;
        if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
                mr->ibmr.rkey = mr->hwmr.lkey;
-       return mr;
+       return 0;
 }
 
 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
 {
+       int status;
        struct ocrdma_mr *mr;
+       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 
-       mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE);
-       if (IS_ERR(mr))
-               return ERR_CAST(mr);
+       if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
+               pr_err("%s err, invalid access rights\n", __func__);
+               return ERR_PTR(-EINVAL);
+       }
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
+                                  OCRDMA_ADDR_CHECK_DISABLE);
+       if (status) {
+               kfree(mr);
+               return ERR_PTR(status);
+       }
 
        return &mr->ibmr;
 }
@@ -502,7 +668,8 @@ static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
        }
 }
 
-static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
+static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+                             u32 num_pbes)
 {
        u32 num_pbls = 0;
        u32 idx = 0;
@@ -518,7 +685,7 @@ static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
                num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
                num_pbls = num_pbls / (pbl_size / sizeof(u64));
                idx++;
-       } while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl);
+       } while (num_pbls >= dev->attr.max_num_mr_pbl);
 
        mr->hwmr.num_pbes = num_pbes;
        mr->hwmr.num_pbls = num_pbls;
@@ -613,13 +780,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
                                 u64 usr_addr, int acc, struct ib_udata *udata)
 {
        int status = -ENOMEM;
-       struct ocrdma_dev *dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
        struct ocrdma_mr *mr;
        struct ocrdma_pd *pd;
        u32 num_pbes;
 
        pd = get_ocrdma_pd(ibpd);
-       dev = pd->dev;
 
        if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
                return ERR_PTR(-EINVAL);
@@ -627,14 +793,13 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(status);
-       mr->hwmr.dev = dev;
        mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
        if (IS_ERR(mr->umem)) {
                status = -EFAULT;
                goto umem_err;
        }
        num_pbes = ib_umem_page_count(mr->umem);
-       status = ocrdma_get_pbl_info(mr, num_pbes);
+       status = ocrdma_get_pbl_info(dev, mr, num_pbes);
        if (status)
                goto umem_err;
 
@@ -654,7 +819,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
        status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
        if (status)
                goto mbx_err;
-       mr->pd = pd;
        mr->ibmr.lkey = mr->hwmr.lkey;
        if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
                mr->ibmr.rkey = mr->hwmr.lkey;
@@ -671,7 +835,7 @@ umem_err:
 int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 {
        struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
-       struct ocrdma_dev *dev = mr->hwmr.dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
        int status;
 
        status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
@@ -686,29 +850,29 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
        return status;
 }
 
-static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
+static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
+                               struct ib_udata *udata,
                                struct ib_ucontext *ib_ctx)
 {
        int status;
-       struct ocrdma_ucontext *uctx;
+       struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
        struct ocrdma_create_cq_uresp uresp;
 
        memset(&uresp, 0, sizeof(uresp));
        uresp.cq_id = cq->id;
-       uresp.page_size = cq->len;
+       uresp.page_size = PAGE_ALIGN(cq->len);
        uresp.num_pages = 1;
        uresp.max_hw_cqe = cq->max_hw_cqe;
        uresp.page_addr[0] = cq->pa;
-       uresp.db_page_addr = cq->dev->nic_info.unmapped_db;
-       uresp.db_page_size = cq->dev->nic_info.db_page_size;
+       uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
+       uresp.db_page_size = dev->nic_info.db_page_size;
        uresp.phase_change = cq->phase_change ? 1 : 0;
        status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
        if (status) {
                pr_err("%s(%d) copy error cqid=0x%x.\n",
-                      __func__, cq->dev->id, cq->id);
+                      __func__, dev->id, cq->id);
                goto err;
        }
-       uctx = get_ocrdma_ucontext(ib_ctx);
        status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
        if (status)
                goto err;
@@ -728,6 +892,8 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
 {
        struct ocrdma_cq *cq;
        struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+       struct ocrdma_ucontext *uctx = NULL;
+       u16 pd_id = 0;
        int status;
        struct ocrdma_create_cq_ureq ureq;
 
@@ -744,15 +910,19 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
        spin_lock_init(&cq->comp_handler_lock);
        INIT_LIST_HEAD(&cq->sq_head);
        INIT_LIST_HEAD(&cq->rq_head);
-       cq->dev = dev;
 
-       status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq);
+       if (ib_ctx) {
+               uctx = get_ocrdma_ucontext(ib_ctx);
+               pd_id = uctx->cntxt_pd->id;
+       }
+
+       status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
        if (status) {
                kfree(cq);
                return ERR_PTR(status);
        }
        if (ib_ctx) {
-               status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx);
+               status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx);
                if (status)
                        goto ctx_err;
        }
@@ -786,13 +956,17 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
 {
        int status;
        struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
-       struct ocrdma_dev *dev = cq->dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
+       int pdid = 0;
 
        status = ocrdma_mbx_destroy_cq(dev, cq);
 
        if (cq->ucontext) {
-               ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
-               ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
+               pdid = cq->ucontext->cntxt_pd->id;
+               ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
+                               PAGE_ALIGN(cq->len));
+               ocrdma_del_mmap(cq->ucontext,
+                               ocrdma_get_db_addr(dev, pdid),
                                dev->nic_info.db_page_size);
        }
        dev->cq_tbl[cq->id] = NULL;
@@ -820,14 +994,17 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
 static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
                                  struct ib_qp_init_attr *attrs)
 {
-       if (attrs->qp_type != IB_QPT_GSI &&
-           attrs->qp_type != IB_QPT_RC &&
-           attrs->qp_type != IB_QPT_UD) {
+       if ((attrs->qp_type != IB_QPT_GSI) &&
+           (attrs->qp_type != IB_QPT_RC) &&
+           (attrs->qp_type != IB_QPT_UC) &&
+           (attrs->qp_type != IB_QPT_UD)) {
                pr_err("%s(%d) unsupported qp type=0x%x requested\n",
                       __func__, dev->id, attrs->qp_type);
                return -EINVAL;
        }
-       if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
+       /* Skip the check for QP1 to support CM size of 128 */
+       if ((attrs->qp_type != IB_QPT_GSI) &&
+           (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
                pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
                       __func__, dev->id, attrs->cap.max_send_wr);
                pr_err("%s(%d) supported send_wr=0x%x\n",
@@ -878,11 +1055,9 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
        /* verify consumer QPs are not trying to use GSI QP's CQ */
        if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
                if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
-                   (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
-                   (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
-                   (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
+                       (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
                        pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
-                              __func__, dev->id);
+                               __func__, dev->id);
                        return -EINVAL;
                }
        }
@@ -905,13 +1080,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
        uresp.qp_id = qp->id;
        uresp.sq_dbid = qp->sq.dbid;
        uresp.num_sq_pages = 1;
-       uresp.sq_page_size = qp->sq.len;
+       uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
        uresp.sq_page_addr[0] = qp->sq.pa;
        uresp.num_wqe_allocated = qp->sq.max_cnt;
        if (!srq) {
                uresp.rq_dbid = qp->rq.dbid;
                uresp.num_rq_pages = 1;
-               uresp.rq_page_size = qp->rq.len;
+               uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
                uresp.rq_page_addr[0] = qp->rq.pa;
                uresp.num_rqe_allocated = qp->rq.max_cnt;
        }
@@ -919,9 +1094,8 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
        uresp.db_page_size = dev->nic_info.db_page_size;
        if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
                uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
-               uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ?
-                       OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET;
-               uresp.db_shift = (qp->id < 128) ? 24 : 16;
+               uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+               uresp.db_shift = 24;
        } else {
                uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
                uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
@@ -964,8 +1138,7 @@ static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
                        OCRDMA_DB_GEN2_SQ_OFFSET;
                qp->rq_db = dev->nic_info.db +
                        (pd->id * dev->nic_info.db_page_size) +
-                       ((qp->id < 128) ?
-                       OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET);
+                       OCRDMA_DB_GEN2_RQ_OFFSET;
        } else {
                qp->sq_db = dev->nic_info.db +
                        (pd->id * dev->nic_info.db_page_size) +
@@ -1006,6 +1179,7 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
        qp->sq.max_sges = attrs->cap.max_send_sge;
        qp->rq.max_sges = attrs->cap.max_recv_sge;
        qp->state = OCRDMA_QPS_RST;
+       qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
 }
 
 
@@ -1026,7 +1200,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
        int status;
        struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
        struct ocrdma_qp *qp;
-       struct ocrdma_dev *dev = pd->dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
        struct ocrdma_create_qp_ureq ureq;
        u16 dpp_credit_lmt, dpp_offset;
 
@@ -1046,6 +1220,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
        }
        qp->dev = dev;
        ocrdma_set_qp_init_params(qp, pd, attrs);
+       if (udata == NULL)
+               qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
+                                       OCRDMA_QP_FAST_REG);
 
        mutex_lock(&dev->dev_lock);
        status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
@@ -1056,8 +1233,6 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
 
        /* user space QP's wr_id table are managed in library */
        if (udata == NULL) {
-               qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
-                                 OCRDMA_QP_FAST_REG);
                status = ocrdma_alloc_wr_id_tbl(qp);
                if (status)
                        goto map_err;
@@ -1093,6 +1268,17 @@ gen_err:
        return ERR_PTR(status);
 }
 
+
+static void ocrdma_flush_rq_db(struct ocrdma_qp *qp)
+{
+       if (qp->db_cache) {
+               u32 val = qp->rq.dbid | (qp->db_cache <<
+                               ocrdma_get_num_posted_shift(qp));
+               iowrite32(val, qp->rq_db);
+               qp->db_cache = 0;
+       }
+}
+
 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                      int attr_mask)
 {
@@ -1104,13 +1290,16 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        qp = get_ocrdma_qp(ibqp);
        dev = qp->dev;
        if (attr_mask & IB_QP_STATE)
-               status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps);
+               status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
        /* if new and previous states are same hw doesn't need to
         * know about it.
         */
        if (status < 0)
                return status;
        status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
+       if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR)
+               ocrdma_flush_rq_db(qp);
+
        return status;
 }
 
@@ -1215,7 +1404,7 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
        qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
        qp_attr->cap.max_send_sge = qp->sq.max_sges;
        qp_attr->cap.max_recv_sge = qp->rq.max_sges;
-       qp_attr->cap.max_inline_data = dev->attr.max_inline_data;
+       qp_attr->cap.max_inline_data = qp->max_inline_data;
        qp_init_attr->cap = qp_attr->cap;
        memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
               sizeof(params.dgid));
@@ -1278,23 +1467,17 @@ static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx)
 
 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
 {
-       int free_cnt;
-       if (q->head >= q->tail)
-               free_cnt = (q->max_cnt - q->head) + q->tail;
-       else
-               free_cnt = q->tail - q->head;
-       return free_cnt;
+       return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
 }
 
 static int is_hw_sq_empty(struct ocrdma_qp *qp)
 {
-       return (qp->sq.tail == qp->sq.head &&
-               ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
+       return (qp->sq.tail == qp->sq.head);
 }
 
 static int is_hw_rq_empty(struct ocrdma_qp *qp)
 {
-       return (qp->rq.tail == qp->rq.head) ? 1 : 0;
+       return (qp->rq.tail == qp->rq.head);
 }
 
 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
@@ -1360,17 +1543,18 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
                 */
                discard_cnt += 1;
                cqe->cmn.qpn = 0;
-               if (is_cqe_for_sq(cqe))
+               if (is_cqe_for_sq(cqe)) {
                        ocrdma_hwq_inc_tail(&qp->sq);
-               else {
+               else {
                        if (qp->srq) {
                                spin_lock_irqsave(&qp->srq->q_lock, flags);
                                ocrdma_hwq_inc_tail(&qp->srq->rq);
                                ocrdma_srq_toggle_bit(qp->srq, cur_getp);
                                spin_unlock_irqrestore(&qp->srq->q_lock, flags);
 
-                       } else
+                       } else {
                                ocrdma_hwq_inc_tail(&qp->rq);
+                       }
                }
 skip_cqe:
                cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
@@ -1378,7 +1562,7 @@ skip_cqe:
        spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
 }
 
-static void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
+void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
 {
        int found = false;
        unsigned long flags;
@@ -1444,9 +1628,11 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
        mutex_unlock(&dev->dev_lock);
 
        if (pd->uctx) {
-               ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
+               ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
+                               PAGE_ALIGN(qp->sq.len));
                if (!qp->srq)
-                       ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
+                       ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
+                                       PAGE_ALIGN(qp->rq.len));
        }
 
        ocrdma_del_flush_qp(qp);
@@ -1457,7 +1643,8 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
        return status;
 }
 
-static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
+static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
+                               struct ib_udata *udata)
 {
        int status;
        struct ocrdma_create_srq_uresp uresp;
@@ -1467,12 +1654,12 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
        uresp.num_rq_pages = 1;
        uresp.rq_page_addr[0] = srq->rq.pa;
        uresp.rq_page_size = srq->rq.len;
-       uresp.db_page_addr = srq->dev->nic_info.unmapped_db +
-           (srq->pd->id * srq->dev->nic_info.db_page_size);
-       uresp.db_page_size = srq->dev->nic_info.db_page_size;
+       uresp.db_page_addr = dev->nic_info.unmapped_db +
+           (srq->pd->id * dev->nic_info.db_page_size);
+       uresp.db_page_size = dev->nic_info.db_page_size;
        uresp.num_rqe_allocated = srq->rq.max_cnt;
-       if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
-               uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
+       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+               uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
                uresp.db_shift = 24;
        } else {
                uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
@@ -1495,7 +1682,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
 {
        int status = -ENOMEM;
        struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
-       struct ocrdma_dev *dev = pd->dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
        struct ocrdma_srq *srq;
 
        if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
@@ -1508,10 +1695,9 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
                return ERR_PTR(status);
 
        spin_lock_init(&srq->q_lock);
-       srq->dev = dev;
        srq->pd = pd;
        srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
-       status = ocrdma_mbx_create_srq(srq, init_attr, pd);
+       status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
        if (status)
                goto err;
 
@@ -1538,7 +1724,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
        }
 
        if (udata) {
-               status = ocrdma_copy_srq_uresp(srq, udata);
+               status = ocrdma_copy_srq_uresp(dev, srq, udata);
                if (status)
                        goto arm_err;
        }
@@ -1584,15 +1770,15 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
 {
        int status;
        struct ocrdma_srq *srq;
-       struct ocrdma_dev *dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
 
        srq = get_ocrdma_srq(ibsrq);
-       dev = srq->dev;
 
        status = ocrdma_mbx_destroy_srq(dev, srq);
 
        if (srq->pd->uctx)
-               ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
+               ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
+                               PAGE_ALIGN(srq->rq.len));
 
        kfree(srq->idx_bit_fields);
        kfree(srq->rqe_wr_id_tbl);
@@ -1634,23 +1820,43 @@ static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
                memset(sge, 0, sizeof(*sge));
 }
 
+static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
+{
+       uint32_t total_len = 0, i;
+
+       for (i = 0; i < num_sge; i++)
+               total_len += sg_list[i].length;
+       return total_len;
+}
+
+
 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
                                    struct ocrdma_hdr_wqe *hdr,
                                    struct ocrdma_sge *sge,
                                    struct ib_send_wr *wr, u32 wqe_size)
 {
-       if (wr->send_flags & IB_SEND_INLINE) {
-               if (wr->sg_list[0].length > qp->max_inline_data) {
+       int i;
+       char *dpp_addr;
+
+       if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
+               hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
+               if (unlikely(hdr->total_len > qp->max_inline_data)) {
                        pr_err("%s() supported_len=0x%x,\n"
                               " unspported len req=0x%x\n", __func__,
-                              qp->max_inline_data, wr->sg_list[0].length);
+                               qp->max_inline_data, hdr->total_len);
                        return -EINVAL;
                }
-               memcpy(sge,
-                      (void *)(unsigned long)wr->sg_list[0].addr,
-                      wr->sg_list[0].length);
-               hdr->total_len = wr->sg_list[0].length;
+               dpp_addr = (char *)sge;
+               for (i = 0; i < wr->num_sge; i++) {
+                       memcpy(dpp_addr,
+                              (void *)(unsigned long)wr->sg_list[i].addr,
+                              wr->sg_list[i].length);
+                       dpp_addr += wr->sg_list[i].length;
+               }
+
                wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
+               if (0 == hdr->total_len)
+                       wqe_size += sizeof(struct ocrdma_sge);
                hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
        } else {
                ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
@@ -1675,8 +1881,9 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
                ocrdma_build_ud_hdr(qp, hdr, wr);
                sge = (struct ocrdma_sge *)(hdr + 2);
                wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
-       } else
+       } else {
                sge = (struct ocrdma_sge *)(hdr + 1);
+       }
 
        status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
        return status;
@@ -1719,6 +1926,96 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
        ext_rw->len = hdr->total_len;
 }
 
+static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
+                           struct ocrdma_hw_mr *hwmr)
+{
+       int i;
+       u64 buf_addr = 0;
+       int num_pbes;
+       struct ocrdma_pbe *pbe;
+
+       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+       num_pbes = 0;
+
+       /* go through the OS phy regions & fill hw pbe entries into pbls. */
+       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+               /* number of pbes can be more for one OS buf, when
+                * buffers are of different sizes.
+                * split the ib_buf to one or more pbes.
+                */
+               buf_addr = wr->wr.fast_reg.page_list->page_list[i];
+               pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+               pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+               num_pbes += 1;
+               pbe++;
+
+               /* if the pbl is full storing the pbes,
+                * move to next pbl.
+               */
+               if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
+                       pbl_tbl++;
+                       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+               }
+       }
+       return;
+}
+
+static int get_encoded_page_size(int pg_sz)
+{
+       /* Max size is 256M 4096 << 16 */
+       int i = 0;
+       for (; i < 17; i++)
+               if (pg_sz == (4096 << i))
+                       break;
+       return i;
+}
+
+
+static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+                          struct ib_send_wr *wr)
+{
+       u64 fbo;
+       struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
+       struct ocrdma_mr *mr;
+       u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+
+       wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
+
+       if ((wr->wr.fast_reg.page_list_len >
+               qp->dev->attr.max_pages_per_frmr) ||
+               (wr->wr.fast_reg.length > 0xffffffffULL))
+               return -EINVAL;
+
+       hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
+       hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+
+       if (wr->wr.fast_reg.page_list_len == 0)
+               BUG();
+       if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
+               hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
+       if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
+               hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
+       if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
+               hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
+       hdr->lkey = wr->wr.fast_reg.rkey;
+       hdr->total_len = wr->wr.fast_reg.length;
+
+       fbo = wr->wr.fast_reg.iova_start -
+           (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
+
+       fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
+       fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
+       fast_reg->fbo_hi = upper_32_bits(fbo);
+       fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
+       fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
+       fast_reg->size_sge =
+               get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
+       mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) &
+               (OCRDMA_MAX_STAG - 1)];
+       build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
+       return 0;
+}
+
 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
 {
        u32 val = qp->sq.dbid | (1 << 16);
@@ -1750,7 +2047,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                }
                hdr = ocrdma_hwq_head(&qp->sq);
                hdr->cw = 0;
-               if (wr->send_flags & IB_SEND_SIGNALED)
+               if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
                        hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
                if (wr->send_flags & IB_SEND_FENCE)
                        hdr->cw |=
@@ -1788,10 +2085,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                case IB_WR_LOCAL_INV:
                        hdr->cw |=
                            (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
-                       hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) /
+                       hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
+                                       sizeof(struct ocrdma_sge)) /
                                OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
                        hdr->lkey = wr->ex.invalidate_rkey;
                        break;
+               case IB_WR_FAST_REG_MR:
+                       status = ocrdma_build_fr(qp, hdr, wr);
+                       break;
                default:
                        status = -EINVAL;
                        break;
@@ -1800,7 +2101,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        *bad_wr = wr;
                        break;
                }
-               if (wr->send_flags & IB_SEND_SIGNALED)
+               if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
                        qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
                else
                        qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
@@ -1824,7 +2125,10 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
 {
        u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
 
-       iowrite32(val, qp->rq_db);
+       if (qp->state != OCRDMA_QPS_INIT)
+               iowrite32(val, qp->rq_db);
+       else
+               qp->db_cache++;
 }
 
 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
@@ -1958,7 +2262,7 @@ int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 
 static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
 {
-       enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR;
+       enum ib_wc_status ibwc_status;
 
        switch (status) {
        case OCRDMA_CQE_GENERAL_ERR:
@@ -2055,6 +2359,9 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
        case OCRDMA_SEND:
                ibwc->opcode = IB_WC_SEND;
                break;
+       case OCRDMA_FR_MR:
+               ibwc->opcode = IB_WC_FAST_REG_MR;
+               break;
        case OCRDMA_LKEY_INV:
                ibwc->opcode = IB_WC_LOCAL_INV;
                break;
@@ -2108,7 +2415,7 @@ static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
        ibwc->status = ocrdma_to_ibwc_err(status);
 
        ocrdma_flush_qp(qp);
-       ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL);
+       ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
 
        /* if wqe/rqe pending for which cqe needs to be returned,
         * trigger inflating it.
@@ -2193,7 +2500,8 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
                ocrdma_update_wc(qp, ibwc, tail);
                *polled = true;
        }
-       wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK;
+       wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
+                       OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
        if (tail != wqe_idx)
                expand = true; /* Coalesced CQE can't be consumed yet */
 
@@ -2242,7 +2550,8 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
        u32 wqe_idx;
 
        srq = get_ocrdma_srq(qp->ibqp.srq);
-       wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
+       wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+                       OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
        ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
        spin_lock_irqsave(&srq->q_lock, flags);
        ocrdma_srq_toggle_bit(srq, wqe_idx);
@@ -2299,9 +2608,9 @@ static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
                ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
                ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
        }
-       if (qp->ibqp.srq)
+       if (qp->ibqp.srq) {
                ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
-       else {
+       else {
                ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
                ocrdma_hwq_inc_tail(&qp->rq);
        }
@@ -2314,13 +2623,14 @@ static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
        bool expand = false;
 
        ibwc->wc_flags = 0;
-       if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
+       if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
                status = (le32_to_cpu(cqe->flags_status_srcqpn) &
                                        OCRDMA_CQE_UD_STATUS_MASK) >>
                                        OCRDMA_CQE_UD_STATUS_SHIFT;
-       else
+       } else {
                status = (le32_to_cpu(cqe->flags_status_srcqpn) &
                             OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+       }
 
        if (status == OCRDMA_CQE_SUCCESS) {
                *polled = true;
@@ -2338,9 +2648,10 @@ static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
        if (cq->phase_change) {
                if (cur_getp == 0)
                        cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
-       } else
+       } else {
                /* clear valid bit */
                cqe->flags_status_srcqpn = 0;
+       }
 }
 
 static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
@@ -2351,7 +2662,7 @@ static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
        bool expand = false;
        int polled_hw_cqes = 0;
        struct ocrdma_qp *qp = NULL;
-       struct ocrdma_dev *dev = cq->dev;
+       struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
        struct ocrdma_cqe *cqe;
        u16 cur_getp; bool polled = false; bool stop = false;
 
@@ -2417,8 +2728,9 @@ static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
                } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
                        ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
                        ocrdma_hwq_inc_tail(&qp->rq);
-               } else
+               } else {
                        return err_cqes;
+               }
                ibwc->byte_len = 0;
                ibwc->status = IB_WC_WR_FLUSH_ERR;
                ibwc = ibwc + 1;
@@ -2431,14 +2743,11 @@ static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
 int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 {
        int cqes_to_poll = num_entries;
-       struct ocrdma_cq *cq = NULL;
-       unsigned long flags;
-       struct ocrdma_dev *dev;
+       struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
        int num_os_cqe = 0, err_cqes = 0;
        struct ocrdma_qp *qp;
-
-       cq = get_ocrdma_cq(ibcq);
-       dev = cq->dev;
+       unsigned long flags;
 
        /* poll cqes from adapter CQ */
        spin_lock_irqsave(&cq->cq_lock, flags);
@@ -2469,16 +2778,14 @@ int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 
 int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
 {
-       struct ocrdma_cq *cq;
-       unsigned long flags;
-       struct ocrdma_dev *dev;
+       struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
        u16 cq_id;
        u16 cur_getp;
        struct ocrdma_cqe *cqe;
+       unsigned long flags;
 
-       cq = get_ocrdma_cq(ibcq);
        cq_id = cq->id;
-       dev = cq->dev;
 
        spin_lock_irqsave(&cq->cq_lock, flags);
        if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
@@ -2500,3 +2807,226 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
        spin_unlock_irqrestore(&cq->cq_lock, flags);
        return 0;
 }
+
+struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
+{
+       int status;
+       struct ocrdma_mr *mr;
+       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+
+       if (max_page_list_len > dev->attr.max_pages_per_frmr)
+               return ERR_PTR(-EINVAL);
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       status = ocrdma_get_pbl_info(dev, mr, max_page_list_len);
+       if (status)
+               goto pbl_err;
+       mr->hwmr.fr_mr = 1;
+       mr->hwmr.remote_rd = 0;
+       mr->hwmr.remote_wr = 0;
+       mr->hwmr.local_rd = 0;
+       mr->hwmr.local_wr = 0;
+       mr->hwmr.mw_bind = 0;
+       status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+       if (status)
+               goto pbl_err;
+       status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
+       if (status)
+               goto mbx_err;
+       mr->ibmr.rkey = mr->hwmr.lkey;
+       mr->ibmr.lkey = mr->hwmr.lkey;
+       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr;
+       return &mr->ibmr;
+mbx_err:
+       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+pbl_err:
+       kfree(mr);
+       return ERR_PTR(-ENOMEM);
+}
+
+struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
+                                                         *ibdev,
+                                                         int page_list_len)
+{
+       struct ib_fast_reg_page_list *frmr_list;
+       int size;
+
+       size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
+       frmr_list = kzalloc(size, GFP_KERNEL);
+       if (!frmr_list)
+               return ERR_PTR(-ENOMEM);
+       frmr_list->page_list = (u64 *)(frmr_list + 1);
+       return frmr_list;
+}
+
+void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
+{
+       kfree(page_list);
+}
+
+#define MAX_KERNEL_PBE_SIZE 65536
+static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
+                                   int buf_cnt, u32 *pbe_size)
+{
+       u64 total_size = 0;
+       u64 buf_size = 0;
+       int i;
+       *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
+       *pbe_size = roundup_pow_of_two(*pbe_size);
+
+       /* find the smallest PBE size that we can have */
+       for (i = 0; i < buf_cnt; i++) {
+               /* first addr may not be page aligned, so ignore checking */
+               if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
+                                (buf_list[i].size & ~PAGE_MASK))) {
+                       return 0;
+               }
+
+               /* if configured PBE size is greater then the chosen one,
+                * reduce the PBE size.
+                */
+               buf_size = roundup(buf_list[i].size, PAGE_SIZE);
+               /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
+               buf_size = roundup_pow_of_two(buf_size);
+               if (*pbe_size > buf_size)
+                       *pbe_size = buf_size;
+
+               total_size += buf_size;
+       }
+       *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
+           (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
+
+       /* num_pbes = total_size / (*pbe_size);  this is implemented below. */
+
+       return total_size >> ilog2(*pbe_size);
+}
+
+static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
+                             u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
+                             struct ocrdma_hw_mr *hwmr)
+{
+       int i;
+       int idx;
+       int pbes_per_buf = 0;
+       u64 buf_addr = 0;
+       int num_pbes;
+       struct ocrdma_pbe *pbe;
+       int total_num_pbes = 0;
+
+       if (!hwmr->num_pbes)
+               return;
+
+       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+       num_pbes = 0;
+
+       /* go through the OS phy regions & fill hw pbe entries into pbls. */
+       for (i = 0; i < ib_buf_cnt; i++) {
+               buf_addr = buf_list[i].addr;
+               pbes_per_buf =
+                   roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
+                   pbe_size;
+               hwmr->len += buf_list[i].size;
+               /* number of pbes can be more for one OS buf, when
+                * buffers are of different sizes.
+                * split the ib_buf to one or more pbes.
+                */
+               for (idx = 0; idx < pbes_per_buf; idx++) {
+                       /* we program always page aligned addresses,
+                        * first unaligned address is taken care by fbo.
+                        */
+                       if (i == 0) {
+                               /* for non zero fbo, assign the
+                                * start of the page.
+                                */
+                               pbe->pa_lo =
+                                   cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+                               pbe->pa_hi =
+                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
+                       } else {
+                               pbe->pa_lo =
+                                   cpu_to_le32((u32) (buf_addr & 0xffffffff));
+                               pbe->pa_hi =
+                                   cpu_to_le32((u32) upper_32_bits(buf_addr));
+                       }
+                       buf_addr += pbe_size;
+                       num_pbes += 1;
+                       total_num_pbes += 1;
+                       pbe++;
+
+                       if (total_num_pbes == hwmr->num_pbes)
+                               goto mr_tbl_done;
+                       /* if the pbl is full storing the pbes,
+                        * move to next pbl.
+                        */
+                       if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
+                               pbl_tbl++;
+                               pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+                               num_pbes = 0;
+                       }
+               }
+       }
+mr_tbl_done:
+       return;
+}
+
+struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
+                                  struct ib_phys_buf *buf_list,
+                                  int buf_cnt, int acc, u64 *iova_start)
+{
+       int status = -ENOMEM;
+       struct ocrdma_mr *mr;
+       struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+       struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+       u32 num_pbes;
+       u32 pbe_size = 0;
+
+       if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
+               return ERR_PTR(-EINVAL);
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(status);
+
+       num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
+       if (num_pbes == 0) {
+               status = -EINVAL;
+               goto pbl_err;
+       }
+       status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+       if (status)
+               goto pbl_err;
+
+       mr->hwmr.pbe_size = pbe_size;
+       mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
+       mr->hwmr.va = *iova_start;
+       mr->hwmr.local_rd = 1;
+       mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+       mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+       mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+       mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+       mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+       status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+       if (status)
+               goto pbl_err;
+       build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
+                         &mr->hwmr);
+       status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
+       if (status)
+               goto mbx_err;
+
+       mr->ibmr.lkey = mr->hwmr.lkey;
+       if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+               mr->ibmr.rkey = mr->hwmr.lkey;
+       return &mr->ibmr;
+
+mbx_err:
+       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+pbl_err:
+       kfree(mr);
+       return ERR_PTR(status);
+}
index 633f03d802746908bcb4f7a58adef800b024ce2f..b8f7853fd36ce7a61e53004105c2b9082af14ebb 100644 (file)
@@ -72,6 +72,7 @@ int ocrdma_query_qp(struct ib_qp *,
                    struct ib_qp_attr *qp_attr,
                    int qp_attr_mask, struct ib_qp_init_attr *);
 int ocrdma_destroy_qp(struct ib_qp *);
+void ocrdma_del_flush_qp(struct ocrdma_qp *qp);
 
 struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
                                 struct ib_udata *);
@@ -89,5 +90,10 @@ struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
                                   int num_phys_buf, int acc, u64 *iova_start);
 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
                                 u64 virt, int acc, struct ib_udata *);
+struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len);
+struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
+                                                       *ibdev,
+                                                       int page_list_len);
+void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
 
 #endif                         /* __OCRDMA_VERBS_H__ */
index 3eceb61e3532844555b2656b42f50a4ca42664dc..7a3175400b2a1c6cf05b85e59f39773b5eb4db86 100644 (file)
@@ -817,7 +817,6 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
                if (neigh) {
                        neigh->cm = NULL;
-                       list_del(&neigh->list);
                        ipoib_neigh_free(neigh);
 
                        tx->neigh = NULL;
@@ -1234,7 +1233,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 
                if (neigh) {
                        neigh->cm = NULL;
-                       list_del(&neigh->list);
                        ipoib_neigh_free(neigh);
 
                        tx->neigh = NULL;
@@ -1325,7 +1323,6 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                        neigh = p->neigh;
                        if (neigh) {
                                neigh->cm = NULL;
-                               list_del(&neigh->list);
                                ipoib_neigh_free(neigh);
                        }
                        list_del(&p->list);
index c6f71a88c55ca9649b6098cc81d64de229857055..82cec1af902cd24533ef032869cf2052fa9d4f63 100644 (file)
@@ -493,7 +493,6 @@ static void path_rec_completion(int status,
                                                                               path,
                                                                               neigh));
                                if (!ipoib_cm_get(neigh)) {
-                                       list_del(&neigh->list);
                                        ipoib_neigh_free(neigh);
                                        continue;
                                }
@@ -618,7 +617,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                        if (!ipoib_cm_get(neigh))
                                ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
                        if (!ipoib_cm_get(neigh)) {
-                               list_del(&neigh->list);
                                ipoib_neigh_free(neigh);
                                goto err_drop;
                        }
@@ -639,7 +637,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                neigh->ah  = NULL;
 
                if (!path->query && path_rec_start(dev, path))
-                       goto err_list;
+                       goto err_path;
 
                __skb_queue_tail(&neigh->queue, skb);
        }
@@ -648,9 +646,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
        ipoib_neigh_put(neigh);
        return;
 
-err_list:
-       list_del(&neigh->list);
-
 err_path:
        ipoib_neigh_free(neigh);
 err_drop:
@@ -1098,6 +1093,8 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
                        rcu_assign_pointer(*np,
                                           rcu_dereference_protected(neigh->hnext,
                                                                     lockdep_is_held(&priv->lock)));
+                       /* remove from parent list */
+                       list_del(&neigh->list);
                        call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
                        return;
                } else {
index 2e84ef859c5b9755d0940ee6bb812915dfcd69f0..705de7b40201a02602d5edbdf657c6a42882ed62 100644 (file)
@@ -347,6 +347,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
        struct iscsi_iser_conn *iser_conn;
+       struct iscsi_session *session;
        struct iser_conn *ib_conn;
        struct iscsi_endpoint *ep;
        int error;
@@ -365,7 +366,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
        }
        ib_conn = ep->dd_data;
 
-       if (iser_alloc_rx_descriptors(ib_conn))
+       session = conn->session;
+       if (iser_alloc_rx_descriptors(ib_conn, session))
                return -ENOMEM;
 
        /* binds the iSER connection retrieved from the previously
@@ -419,12 +421,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
        struct iscsi_cls_session *cls_session;
        struct iscsi_session *session;
        struct Scsi_Host *shost;
-       struct iser_conn *ib_conn;
+       struct iser_conn *ib_conn = NULL;
 
        shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
        if (!shost)
                return NULL;
        shost->transportt = iscsi_iser_scsi_transport;
+       shost->cmd_per_lun = qdepth;
        shost->max_lun = iscsi_max_lun;
        shost->max_id = 0;
        shost->max_channel = 0;
@@ -441,12 +444,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
                           ep ? ib_conn->device->ib_device->dma_device : NULL))
                goto free_host;
 
-       /*
-        * we do not support setting can_queue cmd_per_lun from userspace yet
-        * because we preallocate so many resources
-        */
+       if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+               iser_info("cmds_max changed from %u to %u\n",
+                         cmds_max, ISER_DEF_XMIT_CMDS_MAX);
+               cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+       }
+
        cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
-                                         ISCSI_DEF_XMIT_CMDS_MAX, 0,
+                                         cmds_max, 0,
                                          sizeof(struct iscsi_iser_task),
                                          initial_cmdsn, 0);
        if (!cls_session)
index 4f069c0d4c04371be4a156cfac59f24e23fbac02..67914027c614d5a393d036fcce1ff3edda021a39 100644 (file)
 
 #define iser_warn(fmt, arg...)                         \
        do {                                            \
-               if (iser_debug_level > 1)               \
+               if (iser_debug_level > 0)               \
                        pr_warn(PFX "%s:" fmt,          \
                                __func__ , ## arg);     \
        } while (0)
 
 #define iser_info(fmt, arg...)                         \
        do {                                            \
-               if (iser_debug_level > 0)               \
+               if (iser_debug_level > 1)               \
                        pr_info(PFX "%s:" fmt,          \
                                __func__ , ## arg);     \
        } while (0)
 
                                        /* support up to 512KB in one RDMA */
 #define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
-#define ISER_DEF_CMD_PER_LUN           ISCSI_DEF_XMIT_CMDS_MAX
+#define ISER_DEF_XMIT_CMDS_DEFAULT             512
+#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
+       #define ISER_DEF_XMIT_CMDS_MAX          ISCSI_DEF_XMIT_CMDS_MAX
+#else
+       #define ISER_DEF_XMIT_CMDS_MAX          ISER_DEF_XMIT_CMDS_DEFAULT
+#endif
+#define ISER_DEF_CMD_PER_LUN           ISER_DEF_XMIT_CMDS_MAX
 
 /* QP settings */
 /* Maximal bounds on received asynchronous PDUs */
 #define ISER_MAX_TX_MISC_PDUS          6 /* NOOP_OUT(2), TEXT(1),         *
                                           * SCSI_TMFUNC(2), LOGOUT(1) */
 
-#define ISER_QP_MAX_RECV_DTOS          (ISCSI_DEF_XMIT_CMDS_MAX)
+#define ISER_QP_MAX_RECV_DTOS          (ISER_DEF_XMIT_CMDS_MAX)
 
-#define ISER_MIN_POSTED_RX             (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+#define ISER_MIN_POSTED_RX             (ISER_DEF_XMIT_CMDS_MAX >> 2)
 
 /* the max TX (send) WR supported by the iSER QP is defined by                 *
  * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect   *
 
 #define ISER_INFLIGHT_DATAOUTS         8
 
-#define ISER_QP_MAX_REQ_DTOS           (ISCSI_DEF_XMIT_CMDS_MAX *    \
+#define ISER_QP_MAX_REQ_DTOS           (ISER_DEF_XMIT_CMDS_MAX *    \
                                        (1 + ISER_INFLIGHT_DATAOUTS) + \
                                        ISER_MAX_TX_MISC_PDUS        + \
                                        ISER_MAX_RX_MISC_PDUS)
@@ -205,7 +211,7 @@ struct iser_mem_reg {
        u64  va;
        u64  len;
        void *mem_h;
-       int  is_fmr;
+       int  is_mr;
 };
 
 struct iser_regd_buf {
@@ -246,6 +252,9 @@ struct iser_rx_desc {
 
 #define ISER_MAX_CQ 4
 
+struct iser_conn;
+struct iscsi_iser_task;
+
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
@@ -259,6 +268,22 @@ struct iser_device {
        int                          cq_active_qps[ISER_MAX_CQ];
        int                          cqs_used;
        struct iser_cq_desc          *cq_desc;
+       int                          (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+                                                               unsigned cmds_max);
+       void                         (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+       int                          (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
+                                                         enum iser_data_dir cmd_dir);
+       void                         (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
+                                                           enum iser_data_dir cmd_dir);
+};
+
+struct fast_reg_descriptor {
+       struct list_head                  list;
+       /* For fast registration - FRWR */
+       struct ib_mr                     *data_mr;
+       struct ib_fast_reg_page_list     *data_frpl;
+       /* Valid for fast registration flag */
+       bool                              valid;
 };
 
 struct iser_conn {
@@ -270,13 +295,13 @@ struct iser_conn {
        struct iser_device           *device;       /* device context          */
        struct rdma_cm_id            *cma_id;       /* CMA ID                  */
        struct ib_qp                 *qp;           /* QP                      */
-       struct ib_fmr_pool           *fmr_pool;     /* pool of IB FMRs         */
        wait_queue_head_t            wait;          /* waitq for conn/disconn  */
+       unsigned                     qp_max_recv_dtos; /* num of rx buffers */
+       unsigned                     qp_max_recv_dtos_mask; /* above minus 1 */
+       unsigned                     min_posted_rx; /* qp_max_recv_dtos >> 2 */
        int                          post_recv_buf_count; /* posted rx count  */
        atomic_t                     post_send_buf_count; /* posted tx count   */
        char                         name[ISER_OBJECT_NAME_SIZE];
-       struct iser_page_vec         *page_vec;     /* represents SG to fmr maps*
-                                                    * maps serialized as tx is*/
        struct list_head             conn_list;       /* entry in ig conn list */
 
        char                         *login_buf;
@@ -285,6 +310,17 @@ struct iser_conn {
        unsigned int                 rx_desc_head;
        struct iser_rx_desc          *rx_descs;
        struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
+       union {
+               struct {
+                       struct ib_fmr_pool      *pool;     /* pool of IB FMRs         */
+                       struct iser_page_vec    *page_vec; /* represents SG to fmr maps*
+                                                           * maps serialized as tx is*/
+               } fmr;
+               struct {
+                       struct list_head        pool;
+                       int                     pool_size;
+               } frwr;
+       } fastreg;
 };
 
 struct iscsi_iser_conn {
@@ -368,8 +404,10 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn);
 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
                                     enum iser_data_dir         cmd_dir);
 
-int  iser_reg_rdma_mem(struct iscsi_iser_task *task,
-                      enum   iser_data_dir        cmd_dir);
+int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
+                          enum iser_data_dir cmd_dir);
+int  iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
+                           enum iser_data_dir cmd_dir);
 
 int  iser_connect(struct iser_conn   *ib_conn,
                  struct sockaddr_in *src_addr,
@@ -380,7 +418,10 @@ int  iser_reg_page_vec(struct iser_conn     *ib_conn,
                       struct iser_page_vec *page_vec,
                       struct iser_mem_reg  *mem_reg);
 
-void iser_unreg_mem(struct iser_mem_reg *mem_reg);
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+                       enum iser_data_dir cmd_dir);
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+                        enum iser_data_dir cmd_dir);
 
 int  iser_post_recvl(struct iser_conn *ib_conn);
 int  iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -394,5 +435,9 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
 void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
 int  iser_initialize_task_headers(struct iscsi_task *task,
                        struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct iser_conn *ib_conn);
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_frwr_pool(struct iser_conn *ib_conn);
 #endif
index b6d81a86c9760eed77c3724d3bb138d0e8cf8a6e..5f01da99ad66ad55d6014068054268fdd04ce371 100644 (file)
@@ -49,6 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
 
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
+       struct iser_device  *device = iser_task->iser_conn->ib_conn->device;
        struct iser_regd_buf *regd_buf;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -69,7 +70,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
                return -EINVAL;
        }
 
-       err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
+       err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
        if (err) {
                iser_err("Failed to set up Data-IN RDMA\n");
                return err;
@@ -98,6 +99,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                       unsigned int edtl)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
+       struct iser_device  *device = iser_task->iser_conn->ib_conn->device;
        struct iser_regd_buf *regd_buf;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -119,7 +121,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                return -EINVAL;
        }
 
-       err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
+       err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
        if (err != 0) {
                iser_err("Failed to register write cmd RDMA mem\n");
                return err;
@@ -170,8 +172,78 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
        }
 }
 
+static void iser_free_login_buf(struct iser_conn *ib_conn)
+{
+       if (!ib_conn->login_buf)
+               return;
+
+       if (ib_conn->login_req_dma)
+               ib_dma_unmap_single(ib_conn->device->ib_device,
+                                   ib_conn->login_req_dma,
+                                   ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+       if (ib_conn->login_resp_dma)
+               ib_dma_unmap_single(ib_conn->device->ib_device,
+                                   ib_conn->login_resp_dma,
+                                   ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+       kfree(ib_conn->login_buf);
+
+       /* make sure we never redo any unmapping */
+       ib_conn->login_req_dma = 0;
+       ib_conn->login_resp_dma = 0;
+       ib_conn->login_buf = NULL;
+}
+
+static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+{
+       struct iser_device      *device;
+       int                     req_err, resp_err;
+
+       BUG_ON(ib_conn->device == NULL);
+
+       device = ib_conn->device;
+
+       ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+                                    ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+       if (!ib_conn->login_buf)
+               goto out_err;
+
+       ib_conn->login_req_buf  = ib_conn->login_buf;
+       ib_conn->login_resp_buf = ib_conn->login_buf +
+                                               ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+       ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
+                               (void *)ib_conn->login_req_buf,
+                               ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+       ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
+                               (void *)ib_conn->login_resp_buf,
+                               ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+       req_err  = ib_dma_mapping_error(device->ib_device,
+                                       ib_conn->login_req_dma);
+       resp_err = ib_dma_mapping_error(device->ib_device,
+                                       ib_conn->login_resp_dma);
 
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
+       if (req_err || resp_err) {
+               if (req_err)
+                       ib_conn->login_req_dma = 0;
+               if (resp_err)
+                       ib_conn->login_resp_dma = 0;
+               goto free_login_buf;
+       }
+       return 0;
+
+free_login_buf:
+       iser_free_login_buf(ib_conn);
+
+out_err:
+       iser_err("unable to alloc or map login buf\n");
+       return -ENOMEM;
+}
+
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
 {
        int i, j;
        u64 dma_addr;
@@ -179,14 +251,24 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
        struct ib_sge       *rx_sg;
        struct iser_device  *device = ib_conn->device;
 
-       ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
+       ib_conn->qp_max_recv_dtos = session->cmds_max;
+       ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+       ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+
+       if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
+               goto create_rdma_reg_res_failed;
+
+       if (iser_alloc_login_buf(ib_conn))
+               goto alloc_login_buf_fail;
+
+       ib_conn->rx_descs = kmalloc(session->cmds_max *
                                sizeof(struct iser_rx_desc), GFP_KERNEL);
        if (!ib_conn->rx_descs)
                goto rx_desc_alloc_fail;
 
        rx_desc = ib_conn->rx_descs;
 
-       for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
+       for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)  {
                dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
                                        ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
                if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -207,10 +289,14 @@ rx_desc_dma_map_failed:
        rx_desc = ib_conn->rx_descs;
        for (j = 0; j < i; j++, rx_desc++)
                ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
-                       ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+                                   ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
        kfree(ib_conn->rx_descs);
        ib_conn->rx_descs = NULL;
 rx_desc_alloc_fail:
+       iser_free_login_buf(ib_conn);
+alloc_login_buf_fail:
+       device->iser_free_rdma_reg_res(ib_conn);
+create_rdma_reg_res_failed:
        iser_err("failed allocating rx descriptors / data buffers\n");
        return -ENOMEM;
 }
@@ -222,13 +308,21 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
        struct iser_device *device = ib_conn->device;
 
        if (!ib_conn->rx_descs)
-               return;
+               goto free_login_buf;
+
+       if (device->iser_free_rdma_reg_res)
+               device->iser_free_rdma_reg_res(ib_conn);
 
        rx_desc = ib_conn->rx_descs;
-       for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
+       for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
                ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
-                       ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+                                   ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
        kfree(ib_conn->rx_descs);
+       /* make sure we never redo any unmapping */
+       ib_conn->rx_descs = NULL;
+
+free_login_buf:
+       iser_free_login_buf(ib_conn);
 }
 
 static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
@@ -248,9 +342,10 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
        WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
        WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
 
-       iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
+       iser_dbg("Initially post: %d\n", iser_conn->ib_conn->min_posted_rx);
        /* Initial post receive buffers */
-       if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
+       if (iser_post_recvm(iser_conn->ib_conn,
+                           iser_conn->ib_conn->min_posted_rx))
                return -ENOMEM;
 
        return 0;
@@ -487,9 +582,9 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
                return;
 
        outstanding = ib_conn->post_recv_buf_count;
-       if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
-               count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
-                                               ISER_MIN_POSTED_RX);
+       if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
+               count = min(ib_conn->qp_max_recv_dtos - outstanding,
+                                               ib_conn->min_posted_rx);
                err = iser_post_recvm(ib_conn, count);
                if (err)
                        iser_err("posting %d rx bufs err %d\n", count, err);
@@ -538,8 +633,8 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
 
 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
 {
+       struct iser_device *device = iser_task->iser_conn->ib_conn->device;
        int is_rdma_aligned = 1;
-       struct iser_regd_buf *regd;
 
        /* if we were reading, copy back to unaligned sglist,
         * anyway dma_unmap and free the copy
@@ -553,17 +648,11 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
                iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
        }
 
-       if (iser_task->dir[ISER_DIR_IN]) {
-               regd = &iser_task->rdma_regd[ISER_DIR_IN];
-               if (regd->reg.is_fmr)
-                       iser_unreg_mem(&regd->reg);
-       }
+       if (iser_task->dir[ISER_DIR_IN])
+               device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
 
-       if (iser_task->dir[ISER_DIR_OUT]) {
-               regd = &iser_task->rdma_regd[ISER_DIR_OUT];
-               if (regd->reg.is_fmr)
-                       iser_unreg_mem(&regd->reg);
-       }
+       if (iser_task->dir[ISER_DIR_OUT])
+               device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
 
        /* if the data was unaligned, it was already unmapped and then copied */
        if (is_rdma_aligned)
index 7827baf455a1f45fa4fa20495fcf01dd370914ff..1ce0c97d2ccb894d33ef9871456726aa45b51ab9 100644 (file)
@@ -170,8 +170,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
  */
 
 static int iser_sg_to_page_vec(struct iser_data_buf *data,
-                              struct iser_page_vec *page_vec,
-                              struct ib_device *ibdev)
+                              struct ib_device *ibdev, u64 *pages,
+                              int *offset, int *data_size)
 {
        struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
        u64 start_addr, end_addr, page, chunk_start = 0;
@@ -180,7 +180,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
        int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
 
        /* compute the offset of first element */
-       page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+       *offset = (u64) sgl[0].offset & ~MASK_4K;
 
        new_chunk = 1;
        cur_page  = 0;
@@ -204,13 +204,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
                   which might be unaligned */
                page = chunk_start & MASK_4K;
                do {
-                       page_vec->pages[cur_page++] = page;
+                       pages[cur_page++] = page;
                        page += SIZE_4K;
                } while (page < end_addr);
        }
 
-       page_vec->data_size = total_sz;
-       iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
+       *data_size = total_sz;
+       iser_dbg("page_vec->data_size:%d cur_page %d\n",
+                *data_size, cur_page);
        return cur_page;
 }
 
@@ -267,11 +268,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
        struct scatterlist *sg;
        int i;
 
-       if (iser_debug_level == 0)
-               return;
-
        for_each_sg(sgl, sg, data->dma_nents, i)
-               iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
+               iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
                         "off:0x%x sz:0x%x dma_len:0x%x\n",
                         i, (unsigned long)ib_sg_dma_address(ibdev, sg),
                         sg_page(sg), sg->offset,
@@ -298,8 +296,10 @@ static void iser_page_vec_build(struct iser_data_buf *data,
        page_vec->offset = 0;
 
        iser_dbg("Translating sg sz: %d\n", data->dma_nents);
-       page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev);
-       iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);
+       page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
+                                          &page_vec->offset,
+                                          &page_vec->data_size);
+       iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
 
        page_vec->length = page_vec_len;
 
@@ -347,16 +347,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
        }
 }
 
+static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
+                             struct ib_device *ibdev,
+                             enum iser_data_dir cmd_dir,
+                             int aligned_len)
+{
+       struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+       struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+
+       iscsi_conn->fmr_unalign_cnt++;
+       iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
+                 aligned_len, mem->size);
+
+       if (iser_debug_level > 0)
+               iser_data_buf_dump(mem, ibdev);
+
+       /* unmap the command data before accessing it */
+       iser_dma_unmap_task_data(iser_task);
+
+       /* allocate copy buf, if we are writing, copy the */
+       /* unaligned scatterlist, dma map the copy        */
+       if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
+                       return -ENOMEM;
+
+       return 0;
+}
+
 /**
- * iser_reg_rdma_mem - Registers memory intended for RDMA,
- * obtaining rkey and va
+ * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
+ * using FMR (if possible) obtaining rkey and va
  *
  * returns 0 on success, errno code on failure
  */
-int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
-                     enum   iser_data_dir        cmd_dir)
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
+                         enum iser_data_dir cmd_dir)
 {
-       struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
        struct iser_conn     *ib_conn = iser_task->iser_conn->ib_conn;
        struct iser_device   *device = ib_conn->device;
        struct ib_device     *ibdev = device->ib_device;
@@ -370,20 +395,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
        regd_buf = &iser_task->rdma_regd[cmd_dir];
 
        aligned_len = iser_data_buf_aligned_len(mem, ibdev);
-       if (aligned_len != mem->dma_nents ||
-           (!ib_conn->fmr_pool && mem->dma_nents > 1)) {
-               iscsi_conn->fmr_unalign_cnt++;
-               iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
-                         aligned_len, mem->size);
-               iser_data_buf_dump(mem, ibdev);
-
-               /* unmap the command data before accessing it */
-               iser_dma_unmap_task_data(iser_task);
-
-               /* allocate copy buf, if we are writing, copy the */
-               /* unaligned scatterlist, dma map the copy        */
-               if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
-                               return -ENOMEM;
+       if (aligned_len != mem->dma_nents) {
+               err = fall_to_bounce_buf(iser_task, ibdev,
+                                        cmd_dir, aligned_len);
+               if (err) {
+                       iser_err("failed to allocate bounce buffer\n");
+                       return err;
+               }
                mem = &iser_task->data_copy[cmd_dir];
        }
 
@@ -395,7 +413,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
                regd_buf->reg.rkey = device->mr->rkey;
                regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
                regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-               regd_buf->reg.is_fmr = 0;
+               regd_buf->reg.is_mr = 0;
 
                iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
                         "va: 0x%08lX sz: %ld]\n",
@@ -404,22 +422,159 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
                         (unsigned long)regd_buf->reg.va,
                         (unsigned long)regd_buf->reg.len);
        } else { /* use FMR for multiple dma entries */
-               iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
-               err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+               iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
+               err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+                                       &regd_buf->reg);
                if (err && err != -EAGAIN) {
                        iser_data_buf_dump(mem, ibdev);
                        iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
                                 mem->dma_nents,
                                 ntoh24(iser_task->desc.iscsi_header.dlength));
                        iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
-                                ib_conn->page_vec->data_size, ib_conn->page_vec->length,
-                                ib_conn->page_vec->offset);
-                       for (i=0 ; i<ib_conn->page_vec->length ; i++)
+                                ib_conn->fastreg.fmr.page_vec->data_size,
+                                ib_conn->fastreg.fmr.page_vec->length,
+                                ib_conn->fastreg.fmr.page_vec->offset);
+                       for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
                                iser_err("page_vec[%d] = 0x%llx\n", i,
-                                        (unsigned long long) ib_conn->page_vec->pages[i]);
+                                        (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
                }
                if (err)
                        return err;
        }
        return 0;
 }
+
+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+                           struct iser_conn *ib_conn,
+                           struct iser_regd_buf *regd_buf,
+                           u32 offset, unsigned int data_size,
+                           unsigned int page_list_len)
+{
+       struct ib_send_wr fastreg_wr, inv_wr;
+       struct ib_send_wr *bad_wr, *wr = NULL;
+       u8 key;
+       int ret;
+
+       if (!desc->valid) {
+               memset(&inv_wr, 0, sizeof(inv_wr));
+               inv_wr.opcode = IB_WR_LOCAL_INV;
+               inv_wr.send_flags = IB_SEND_SIGNALED;
+               inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+               wr = &inv_wr;
+               /* Bump the key */
+               key = (u8)(desc->data_mr->rkey & 0x000000FF);
+               ib_update_fast_reg_key(desc->data_mr, ++key);
+       }
+
+       /* Prepare FASTREG WR */
+       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+       fastreg_wr.send_flags = IB_SEND_SIGNALED;
+       fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
+       fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+       fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+       fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+       fastreg_wr.wr.fast_reg.length = data_size;
+       fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+       fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
+                                              IB_ACCESS_REMOTE_WRITE |
+                                              IB_ACCESS_REMOTE_READ);
+
+       if (!wr) {
+               wr = &fastreg_wr;
+               atomic_inc(&ib_conn->post_send_buf_count);
+       } else {
+               wr->next = &fastreg_wr;
+               atomic_add(2, &ib_conn->post_send_buf_count);
+       }
+
+       ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+       if (ret) {
+               if (bad_wr->next)
+                       atomic_sub(2, &ib_conn->post_send_buf_count);
+               else
+                       atomic_dec(&ib_conn->post_send_buf_count);
+               iser_err("fast registration failed, ret:%d\n", ret);
+               return ret;
+       }
+       desc->valid = false;
+
+       regd_buf->reg.mem_h = desc;
+       regd_buf->reg.lkey = desc->data_mr->lkey;
+       regd_buf->reg.rkey = desc->data_mr->rkey;
+       regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
+       regd_buf->reg.len = data_size;
+       regd_buf->reg.is_mr = 1;
+
+       return ret;
+}
+
+/**
+ * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * using Fast Registration WR (if possible) obtaining rkey and va
+ *
+ * returns 0 on success, errno code on failure
+ */
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
+                          enum iser_data_dir cmd_dir)
+{
+       struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+       struct iser_device *device = ib_conn->device;
+       struct ib_device *ibdev = device->ib_device;
+       struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+       struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+       struct fast_reg_descriptor *desc;
+       unsigned int data_size, page_list_len;
+       int err, aligned_len;
+       unsigned long flags;
+       u32 offset;
+
+       aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+       if (aligned_len != mem->dma_nents) {
+               err = fall_to_bounce_buf(iser_task, ibdev,
+                                        cmd_dir, aligned_len);
+               if (err) {
+                       iser_err("failed to allocate bounce buffer\n");
+                       return err;
+               }
+               mem = &iser_task->data_copy[cmd_dir];
+       }
+
+       /* if there a single dma entry, dma mr suffices */
+       if (mem->dma_nents == 1) {
+               struct scatterlist *sg = (struct scatterlist *)mem->buf;
+
+               regd_buf->reg.lkey = device->mr->lkey;
+               regd_buf->reg.rkey = device->mr->rkey;
+               regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
+               regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
+               regd_buf->reg.is_mr = 0;
+       } else {
+               spin_lock_irqsave(&ib_conn->lock, flags);
+               desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+                                       struct fast_reg_descriptor, list);
+               list_del(&desc->list);
+               spin_unlock_irqrestore(&ib_conn->lock, flags);
+               page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
+                                                   desc->data_frpl->page_list,
+                                                   &offset, &data_size);
+
+               if (page_list_len * SIZE_4K < data_size) {
+                       iser_err("fast reg page_list too short to hold this SG\n");
+                       err = -EINVAL;
+                       goto err_reg;
+               }
+
+               err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
+                                      offset, data_size, page_list_len);
+               if (err)
+                       goto err_reg;
+       }
+
+       return 0;
+err_reg:
+       spin_lock_irqsave(&ib_conn->lock, flags);
+       list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+       spin_unlock_irqrestore(&ib_conn->lock, flags);
+       return err;
+}
index 2c4941d0656b2e389cb0d38f978682ce73388311..afe95674008be88104d384923129d40fda9f75e8 100644 (file)
@@ -73,6 +73,36 @@ static int iser_create_device_ib_res(struct iser_device *device)
 {
        int i, j;
        struct iser_cq_desc *cq_desc;
+       struct ib_device_attr *dev_attr;
+
+       dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
+       if (!dev_attr)
+               return -ENOMEM;
+
+       if (ib_query_device(device->ib_device, dev_attr)) {
+               pr_warn("Query device failed for %s\n", device->ib_device->name);
+               goto dev_attr_err;
+       }
+
+       /* Assign function handles  - based on FMR support */
+       if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
+           device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+               iser_info("FMR supported, using FMR for registration\n");
+               device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
+               device->iser_free_rdma_reg_res = iser_free_fmr_pool;
+               device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
+               device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
+       } else
+       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+               iser_info("FRWR supported, using FRWR for registration\n");
+               device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
+               device->iser_free_rdma_reg_res = iser_free_frwr_pool;
+               device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
+               device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+       } else {
+               iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
+               goto dev_attr_err;
+       }
 
        device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
        iser_info("using %d CQs, device %s supports %d vectors\n",
@@ -128,6 +158,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
        if (ib_register_event_handler(&device->event_handler))
                goto handler_err;
 
+       kfree(dev_attr);
        return 0;
 
 handler_err:
@@ -147,6 +178,8 @@ pd_err:
        kfree(device->cq_desc);
 cq_desc_err:
        iser_err("failed to allocate an IB resource\n");
+dev_attr_err:
+       kfree(dev_attr);
        return -1;
 }
 
@@ -178,56 +211,23 @@ static void iser_free_device_ib_res(struct iser_device *device)
 }
 
 /**
- * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
+ * iser_create_fmr_pool - Creates FMR pool and page_vector
  *
- * returns 0 on success, -1 on failure
+ * returns 0 on success, or errno code on failure
  */
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
 {
-       struct iser_device      *device;
-       struct ib_qp_init_attr  init_attr;
-       int                     req_err, resp_err, ret = -ENOMEM;
+       struct iser_device *device = ib_conn->device;
        struct ib_fmr_pool_param params;
-       int index, min_index = 0;
-
-       BUG_ON(ib_conn->device == NULL);
-
-       device = ib_conn->device;
-
-       ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
-                                       ISER_RX_LOGIN_SIZE, GFP_KERNEL);
-       if (!ib_conn->login_buf)
-               goto out_err;
-
-       ib_conn->login_req_buf  = ib_conn->login_buf;
-       ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;
-
-       ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
-                               (void *)ib_conn->login_req_buf,
-                               ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
-
-       ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
-                               (void *)ib_conn->login_resp_buf,
-                               ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
-
-       req_err  = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
-       resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
-
-       if (req_err || resp_err) {
-               if (req_err)
-                       ib_conn->login_req_dma = 0;
-               if (resp_err)
-                       ib_conn->login_resp_dma = 0;
-               goto out_err;
-       }
+       int ret = -ENOMEM;
 
-       ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
-                                   (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
-                                   GFP_KERNEL);
-       if (!ib_conn->page_vec)
-               goto out_err;
+       ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
+                                               (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+                                               GFP_KERNEL);
+       if (!ib_conn->fastreg.fmr.page_vec)
+               return ret;
 
-       ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
+       ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
 
        params.page_shift        = SHIFT_4K;
        /* when the first/last SG element are not start/end *
@@ -235,24 +235,143 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
        params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
        /* make the pool size twice the max number of SCSI commands *
         * the ML is expected to queue, watermark for unmap at 50%  */
-       params.pool_size         = ISCSI_DEF_XMIT_CMDS_MAX * 2;
-       params.dirty_watermark   = ISCSI_DEF_XMIT_CMDS_MAX;
+       params.pool_size         = cmds_max * 2;
+       params.dirty_watermark   = cmds_max;
        params.cache             = 0;
        params.flush_function    = NULL;
        params.access            = (IB_ACCESS_LOCAL_WRITE  |
                                    IB_ACCESS_REMOTE_WRITE |
                                    IB_ACCESS_REMOTE_READ);
 
-       ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
-       ret = PTR_ERR(ib_conn->fmr_pool);
-       if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) {
-               ib_conn->fmr_pool = NULL;
-               goto out_err;
-       } else if (ret == -ENOSYS) {
-               ib_conn->fmr_pool = NULL;
+       ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, &params);
+       if (!IS_ERR(ib_conn->fastreg.fmr.pool))
+               return 0;
+
+       /* no FMR => no need for page_vec */
+       kfree(ib_conn->fastreg.fmr.page_vec);
+       ib_conn->fastreg.fmr.page_vec = NULL;
+
+       ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
+       ib_conn->fastreg.fmr.pool = NULL;
+       if (ret != -ENOSYS) {
+               iser_err("FMR allocation failed, err %d\n", ret);
+               return ret;
+       } else {
                iser_warn("FMRs are not supported, using unaligned mode\n");
-               ret = 0;
+               return 0;
        }
+}
+
+/**
+ * iser_free_fmr_pool - releases the FMR pool and page vec
+ */
+void iser_free_fmr_pool(struct iser_conn *ib_conn)
+{
+       iser_info("freeing conn %p fmr pool %p\n",
+                 ib_conn, ib_conn->fastreg.fmr.pool);
+
+       if (ib_conn->fastreg.fmr.pool != NULL)
+               ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
+
+       ib_conn->fastreg.fmr.pool = NULL;
+
+       kfree(ib_conn->fastreg.fmr.page_vec);
+       ib_conn->fastreg.fmr.page_vec = NULL;
+}
+
+/**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+       struct iser_device      *device = ib_conn->device;
+       struct fast_reg_descriptor      *desc;
+       int i, ret;
+
+       INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+       ib_conn->fastreg.frwr.pool_size = 0;
+       for (i = 0; i < cmds_max; i++) {
+               desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+               if (!desc) {
+                       iser_err("Failed to allocate a new fast_reg descriptor\n");
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
+                                                        ISCSI_ISER_SG_TABLESIZE + 1);
+               if (IS_ERR(desc->data_frpl)) {
+                       ret = PTR_ERR(desc->data_frpl);
+                       iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret);
+                       goto fast_reg_page_failure;
+               }
+
+               desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+                                                    ISCSI_ISER_SG_TABLESIZE + 1);
+               if (IS_ERR(desc->data_mr)) {
+                       ret = PTR_ERR(desc->data_mr);
+                       iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+                       goto fast_reg_mr_failure;
+               }
+               desc->valid = true;
+               list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+               ib_conn->fastreg.frwr.pool_size++;
+       }
+
+       return 0;
+
+fast_reg_mr_failure:
+       ib_free_fast_reg_page_list(desc->data_frpl);
+fast_reg_page_failure:
+       kfree(desc);
+err:
+       iser_free_frwr_pool(ib_conn);
+       return ret;
+}
+
+/**
+ * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ */
+void iser_free_frwr_pool(struct iser_conn *ib_conn)
+{
+       struct fast_reg_descriptor *desc, *tmp;
+       int i = 0;
+
+       if (list_empty(&ib_conn->fastreg.frwr.pool))
+               return;
+
+       iser_info("freeing conn %p frwr pool\n", ib_conn);
+
+       list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+               list_del(&desc->list);
+               ib_free_fast_reg_page_list(desc->data_frpl);
+               ib_dereg_mr(desc->data_mr);
+               kfree(desc);
+               ++i;
+       }
+
+       if (i < ib_conn->fastreg.frwr.pool_size)
+               iser_warn("pool still has %d regions registered\n",
+                         ib_conn->fastreg.frwr.pool_size - i);
+}
+
+/**
+ * iser_create_ib_conn_res - Queue-Pair (QP)
+ *
+ * returns 0 on success, -1 on failure
+ */
+static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+{
+       struct iser_device      *device;
+       struct ib_qp_init_attr  init_attr;
+       int                     ret = -ENOMEM;
+       int index, min_index = 0;
+
+       BUG_ON(ib_conn->device == NULL);
+
+       device = ib_conn->device;
 
        memset(&init_attr, 0, sizeof init_attr);
 
@@ -282,9 +401,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
                goto out_err;
 
        ib_conn->qp = ib_conn->cma_id->qp;
-       iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
+       iser_info("setting conn %p cma_id %p qp %p\n",
                  ib_conn, ib_conn->cma_id,
-                 ib_conn->fmr_pool, ib_conn->cma_id->qp);
+                 ib_conn->cma_id->qp);
        return ret;
 
 out_err:
@@ -293,7 +412,7 @@ out_err:
 }
 
 /**
- * releases the FMR pool and QP objects, returns 0 on success,
+ * releases the QP objects, returns 0 on success,
  * -1 on failure
  */
 static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
@@ -301,13 +420,11 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
        int cq_index;
        BUG_ON(ib_conn == NULL);
 
-       iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n",
+       iser_info("freeing conn %p cma_id %p qp %p\n",
                  ib_conn, ib_conn->cma_id,
-                 ib_conn->fmr_pool, ib_conn->qp);
+                 ib_conn->qp);
 
        /* qp is created only once both addr & route are resolved */
-       if (ib_conn->fmr_pool != NULL)
-               ib_destroy_fmr_pool(ib_conn->fmr_pool);
 
        if (ib_conn->qp != NULL) {
                cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
@@ -316,21 +433,7 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
                rdma_destroy_qp(ib_conn->cma_id);
        }
 
-       ib_conn->fmr_pool = NULL;
        ib_conn->qp       = NULL;
-       kfree(ib_conn->page_vec);
-
-       if (ib_conn->login_buf) {
-               if (ib_conn->login_req_dma)
-                       ib_dma_unmap_single(ib_conn->device->ib_device,
-                               ib_conn->login_req_dma,
-                               ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
-               if (ib_conn->login_resp_dma)
-                       ib_dma_unmap_single(ib_conn->device->ib_device,
-                               ib_conn->login_resp_dma,
-                               ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
-               kfree(ib_conn->login_buf);
-       }
 
        return 0;
 }
@@ -694,7 +797,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
        page_list = page_vec->pages;
        io_addr   = page_list[0];
 
-       mem  = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
+       mem  = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
                                    page_list,
                                    page_vec->length,
                                    io_addr);
@@ -709,7 +812,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
        mem_reg->rkey  = mem->fmr->rkey;
        mem_reg->len   = page_vec->length * SIZE_4K;
        mem_reg->va    = io_addr;
-       mem_reg->is_fmr = 1;
+       mem_reg->is_mr = 1;
        mem_reg->mem_h = (void *)mem;
 
        mem_reg->va   += page_vec->offset;
@@ -727,12 +830,18 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,
 }
 
 /**
- * Unregister (previosuly registered) memory.
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
  */
-void iser_unreg_mem(struct iser_mem_reg *reg)
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+                       enum iser_data_dir cmd_dir)
 {
+       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
        int ret;
 
+       if (!reg->is_mr)
+               return;
+
        iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
 
        ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
@@ -742,6 +851,23 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
        reg->mem_h = NULL;
 }
 
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+                        enum iser_data_dir cmd_dir)
+{
+       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+       struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+       struct fast_reg_descriptor *desc = reg->mem_h;
+
+       if (!reg->is_mr)
+               return;
+
+       reg->mem_h = NULL;
+       reg->is_mr = 0;
+       spin_lock_bh(&ib_conn->lock);
+       list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+       spin_unlock_bh(&ib_conn->lock);
+}
+
 int iser_post_recvl(struct iser_conn *ib_conn)
 {
        struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -779,7 +905,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
                rx_wr->sg_list  = &rx_desc->rx_sg;
                rx_wr->num_sge  = 1;
                rx_wr->next     = rx_wr + 1;
-               my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
+               my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
        }
 
        rx_wr--;
@@ -863,7 +989,11 @@ static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)
                if (wc.status == IB_WC_SUCCESS) {
                        if (wc.opcode == IB_WC_SEND)
                                iser_snd_completion(tx_desc, ib_conn);
-                       else
+                       else if (wc.opcode == IB_WC_LOCAL_INV ||
+                                wc.opcode == IB_WC_FAST_REG_MR) {
+                               atomic_dec(&ib_conn->post_send_buf_count);
+                               continue;
+                       } else
                                iser_err("expected opcode %d got %d\n",
                                        IB_WC_SEND, wc.opcode);
                } else {
index 2aafb809e067b7a204a3fb1a92412756d9ceb78f..dfd1e36f57531f0e9e9cfeed14048460d96f6e30 100644 (file)
@@ -576,6 +576,7 @@ struct adapter {
        struct l2t_data *l2t;
        void *uld_handle[CXGB4_ULD_MAX];
        struct list_head list_node;
+       struct list_head rcu_node;
 
        struct tid_info tids;
        void **tid_release_head;
index 5a3256b083f23f7e69d8013c4ca1ac0585ec33c4..038df4b9613988d4fd030760be6a378e9e53555e 100644 (file)
@@ -60,6 +60,7 @@
 #include <linux/workqueue.h>
 #include <net/neighbour.h>
 #include <net/netevent.h>
+#include <net/addrconf.h>
 #include <asm/uaccess.h>
 
 #include "cxgb4.h"
 #include "t4fw_api.h"
 #include "l2t.h"
 
+#include <../drivers/net/bonding/bonding.h>
+
+#ifdef DRV_VERSION
+#undef DRV_VERSION
+#endif
 #define DRV_VERSION "2.0.0-ko"
 #define DRV_DESC "Chelsio T4/T5 Network Driver"
 
@@ -400,6 +406,9 @@ static struct dentry *cxgb4_debugfs_root;
 
 static LIST_HEAD(adapter_list);
 static DEFINE_MUTEX(uld_mutex);
+/* Adapter list to be accessed from atomic context */
+static LIST_HEAD(adap_rcu_list);
+static DEFINE_SPINLOCK(adap_rcu_lock);
 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
 static const char *uld_str[] = { "RDMA", "iSCSI" };
 
@@ -3227,6 +3236,38 @@ static int tid_init(struct tid_info *t)
        return 0;
 }
 
+static int cxgb4_clip_get(const struct net_device *dev,
+                         const struct in6_addr *lip)
+{
+       struct adapter *adap;
+       struct fw_clip_cmd c;
+
+       adap = netdev2adap(dev);
+       memset(&c, 0, sizeof(c));
+       c.op_to_write = htonl(FW_CMD_OP(FW_CLIP_CMD) |
+                       FW_CMD_REQUEST | FW_CMD_WRITE);
+       c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c));
+       *(__be64 *)&c.ip_hi = *(__be64 *)(lip->s6_addr);
+       *(__be64 *)&c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
+       return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
+}
+
+static int cxgb4_clip_release(const struct net_device *dev,
+                             const struct in6_addr *lip)
+{
+       struct adapter *adap;
+       struct fw_clip_cmd c;
+
+       adap = netdev2adap(dev);
+       memset(&c, 0, sizeof(c));
+       c.op_to_write = htonl(FW_CMD_OP(FW_CLIP_CMD) |
+                       FW_CMD_REQUEST | FW_CMD_READ);
+       c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c));
+       *(__be64 *)&c.ip_hi = *(__be64 *)(lip->s6_addr);
+       *(__be64 *)&c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
+       return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
+}
+
 /**
  *     cxgb4_create_server - create an IP server
  *     @dev: the device
@@ -3246,6 +3287,7 @@ int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
        struct sk_buff *skb;
        struct adapter *adap;
        struct cpl_pass_open_req *req;
+       int ret;
 
        skb = alloc_skb(sizeof(*req), GFP_KERNEL);
        if (!skb)
@@ -3263,10 +3305,78 @@ int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
        req->opt0 = cpu_to_be64(TX_CHAN(chan));
        req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
                                SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
-       return t4_mgmt_tx(adap, skb);
+       ret = t4_mgmt_tx(adap, skb);
+       return net_xmit_eval(ret);
 }
 EXPORT_SYMBOL(cxgb4_create_server);
 
+/*     cxgb4_create_server6 - create an IPv6 server
+ *     @dev: the device
+ *     @stid: the server TID
+ *     @sip: local IPv6 address to bind server to
+ *     @sport: the server's TCP port
+ *     @queue: queue to direct messages from this server to
+ *
+ *     Create an IPv6 server for the given port and address.
+ *     Returns <0 on error and one of the %NET_XMIT_* values on success.
+ */
+int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
+                        const struct in6_addr *sip, __be16 sport,
+                        unsigned int queue)
+{
+       unsigned int chan;
+       struct sk_buff *skb;
+       struct adapter *adap;
+       struct cpl_pass_open_req6 *req;
+       int ret;
+
+       skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       adap = netdev2adap(dev);
+       req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
+       INIT_TP_WR(req, 0);
+       OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
+       req->local_port = sport;
+       req->peer_port = htons(0);
+       req->local_ip_hi = *(__be64 *)(sip->s6_addr);
+       req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
+       req->peer_ip_hi = cpu_to_be64(0);
+       req->peer_ip_lo = cpu_to_be64(0);
+       chan = rxq_to_chan(&adap->sge, queue);
+       req->opt0 = cpu_to_be64(TX_CHAN(chan));
+       req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
+                               SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
+       ret = t4_mgmt_tx(adap, skb);
+       return net_xmit_eval(ret);
+}
+EXPORT_SYMBOL(cxgb4_create_server6);
+
+int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
+                       unsigned int queue, bool ipv6)
+{
+       struct sk_buff *skb;
+       struct adapter *adap;
+       struct cpl_close_listsvr_req *req;
+       int ret;
+
+       adap = netdev2adap(dev);
+
+       skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       req = (struct cpl_close_listsvr_req *)__skb_put(skb, sizeof(*req));
+       INIT_TP_WR(req, 0);
+       OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
+       req->reply_ctrl = htons(NO_REPLY(0) | (ipv6 ? LISTSVR_IPV6(1) :
+                               LISTSVR_IPV6(0)) | QUEUENO(queue));
+       ret = t4_mgmt_tx(adap, skb);
+       return net_xmit_eval(ret);
+}
+EXPORT_SYMBOL(cxgb4_remove_server);
+
 /**
  *     cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
  *     @mtus: the HW MTU table
@@ -3721,6 +3831,10 @@ static void attach_ulds(struct adapter *adap)
 {
        unsigned int i;
 
+       spin_lock(&adap_rcu_lock);
+       list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
+       spin_unlock(&adap_rcu_lock);
+
        mutex_lock(&uld_mutex);
        list_add_tail(&adap->list_node, &adapter_list);
        for (i = 0; i < CXGB4_ULD_MAX; i++)
@@ -3746,6 +3860,10 @@ static void detach_ulds(struct adapter *adap)
                netevent_registered = false;
        }
        mutex_unlock(&uld_mutex);
+
+       spin_lock(&adap_rcu_lock);
+       list_del_rcu(&adap->rcu_node);
+       spin_unlock(&adap_rcu_lock);
 }
 
 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
@@ -3809,6 +3927,169 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 }
 EXPORT_SYMBOL(cxgb4_unregister_uld);
 
+/* Check if netdev on which event is occured belongs to us or not. Return
+ * suceess (1) if it belongs otherwise failure (0).
+ */
+static int cxgb4_netdev(struct net_device *netdev)
+{
+       struct adapter *adap;
+       int i;
+
+       spin_lock(&adap_rcu_lock);
+       list_for_each_entry_rcu(adap, &adap_rcu_list, rcu_node)
+               for (i = 0; i < MAX_NPORTS; i++)
+                       if (adap->port[i] == netdev) {
+                               spin_unlock(&adap_rcu_lock);
+                               return 1;
+                       }
+       spin_unlock(&adap_rcu_lock);
+       return 0;
+}
+
+static int clip_add(struct net_device *event_dev, struct inet6_ifaddr *ifa,
+                   unsigned long event)
+{
+       int ret = NOTIFY_DONE;
+
+       rcu_read_lock();
+       if (cxgb4_netdev(event_dev)) {
+               switch (event) {
+               case NETDEV_UP:
+                       ret = cxgb4_clip_get(event_dev,
+                               (const struct in6_addr *)ifa->addr.s6_addr);
+                       if (ret < 0) {
+                               rcu_read_unlock();
+                               return ret;
+                       }
+                       ret = NOTIFY_OK;
+                       break;
+               case NETDEV_DOWN:
+                       cxgb4_clip_release(event_dev,
+                               (const struct in6_addr *)ifa->addr.s6_addr);
+                       ret = NOTIFY_OK;
+                       break;
+               default:
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
+static int cxgb4_inet6addr_handler(struct notifier_block *this,
+               unsigned long event, void *data)
+{
+       struct inet6_ifaddr *ifa = data;
+       struct net_device *event_dev;
+       int ret = NOTIFY_DONE;
+       int cnt;
+       struct bonding *bond = netdev_priv(ifa->idev->dev);
+       struct slave *slave;
+       struct pci_dev *first_pdev = NULL;
+
+       if (ifa->idev->dev->priv_flags & IFF_802_1Q_VLAN) {
+               event_dev = vlan_dev_real_dev(ifa->idev->dev);
+               ret = clip_add(event_dev, ifa, event);
+       } else if (ifa->idev->dev->flags & IFF_MASTER) {
+               /* It is possible that two different adapters are bonded in one
+                * bond. We need to find such different adapters and add clip
+                * in all of them only once.
+                */
+               read_lock(&bond->lock);
+               bond_for_each_slave(bond, slave, cnt) {
+                       if (!first_pdev) {
+                               ret = clip_add(slave->dev, ifa, event);
+                               /* If clip_add is success then only initialize
+                                * first_pdev since it means it is our device
+                                */
+                               if (ret == NOTIFY_OK)
+                                       first_pdev = to_pci_dev(
+                                                       slave->dev->dev.parent);
+                       } else if (first_pdev !=
+                                  to_pci_dev(slave->dev->dev.parent))
+                                       ret = clip_add(slave->dev, ifa, event);
+               }
+               read_unlock(&bond->lock);
+       } else
+               ret = clip_add(ifa->idev->dev, ifa, event);
+
+       return ret;
+}
+
+static struct notifier_block cxgb4_inet6addr_notifier = {
+       .notifier_call = cxgb4_inet6addr_handler
+};
+
+/* Retrieves IPv6 addresses from a root device (bond, vlan) associated with
+ * a physical device.
+ * The physical device reference is needed to send the actul CLIP command.
+ */
+static int update_dev_clip(struct net_device *root_dev, struct net_device *dev)
+{
+       struct inet6_dev *idev = NULL;
+       struct inet6_ifaddr *ifa;
+       int ret = 0;
+
+       idev = __in6_dev_get(root_dev);
+       if (!idev)
+               return ret;
+
+       read_lock_bh(&idev->lock);
+       list_for_each_entry(ifa, &idev->addr_list, if_list) {
+               ret = cxgb4_clip_get(dev,
+                               (const struct in6_addr *)ifa->addr.s6_addr);
+               if (ret < 0)
+                       break;
+       }
+       read_unlock_bh(&idev->lock);
+
+       return ret;
+}
+
+static int update_root_dev_clip(struct net_device *dev)
+{
+       struct net_device *root_dev = NULL;
+       int i, ret = 0;
+
+       /* First populate the real net device's IPv6 addresses */
+       ret = update_dev_clip(dev, dev);
+       if (ret)
+               return ret;
+
+       /* Parse all bond and vlan devices layered on top of the physical dev */
+       for (i = 0; i < VLAN_N_VID; i++) {
+               root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i);
+               if (!root_dev)
+                       continue;
+
+               ret = update_dev_clip(root_dev, dev);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+static void update_clip(const struct adapter *adap)
+{
+       int i;
+       struct net_device *dev;
+       int ret;
+
+       rcu_read_lock();
+
+       for (i = 0; i < MAX_NPORTS; i++) {
+               dev = adap->port[i];
+               ret = 0;
+
+               if (dev)
+                       ret = update_root_dev_clip(dev);
+
+               if (ret < 0)
+                       break;
+       }
+       rcu_read_unlock();
+}
+
 /**
  *     cxgb_up - enable the adapter
  *     @adap: adapter being enabled
@@ -3854,6 +4135,7 @@ static int cxgb_up(struct adapter *adap)
        t4_intr_enable(adap);
        adap->flags |= FULL_INIT_DONE;
        notify_ulds(adap, CXGB4_STATE_UP);
+       update_clip(adap);
  out:
        return err;
  irq_err:
@@ -5870,11 +6152,15 @@ static int __init cxgb4_init_module(void)
        ret = pci_register_driver(&cxgb4_driver);
        if (ret < 0)
                debugfs_remove(cxgb4_debugfs_root);
+
+       register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+
        return ret;
 }
 
 static void __exit cxgb4_cleanup_module(void)
 {
+       unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
        pci_unregister_driver(&cxgb4_driver);
        debugfs_remove(cxgb4_debugfs_root);  /* NULL ok */
        flush_workqueue(workq);
index 4faf4d067ee71947c74c0a3e6e234208b315bee2..6f21f2451c3052a24ecd2e8d8d16ee9bdf9996fe 100644 (file)
@@ -154,6 +154,11 @@ struct in6_addr;
 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
                        __be32 sip, __be16 sport, __be16 vlan,
                        unsigned int queue);
+int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
+                        const struct in6_addr *sip, __be16 sport,
+                        unsigned int queue);
+int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
+                       unsigned int queue, bool ipv6);
 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
                               __be32 sip, __be16 sport, __be16 vlan,
                               unsigned int queue,
index 01d484441200d4675faff505338fb7b7a0789078..cd6874b571ee2585c3ef1c9f8c418c7c643b8d46 100644 (file)
@@ -320,6 +320,21 @@ struct cpl_act_open_req6 {
        __be32 opt2;
 };
 
+struct cpl_t5_act_open_req6 {
+       WR_HDR;
+       union opcode_tid ot;
+       __be16 local_port;
+       __be16 peer_port;
+       __be64 local_ip_hi;
+       __be64 local_ip_lo;
+       __be64 peer_ip_hi;
+       __be64 peer_ip_lo;
+       __be64 opt0;
+       __be32 rsvd;
+       __be32 opt2;
+       __be64 params;
+};
+
 struct cpl_act_open_rpl {
        union opcode_tid ot;
        __be32 atid_status;
@@ -405,7 +420,7 @@ struct cpl_close_listsvr_req {
        WR_HDR;
        union opcode_tid ot;
        __be16 reply_ctrl;
-#define LISTSVR_IPV6 (1 << 14)
+#define LISTSVR_IPV6(x) ((x) << 14)
        __be16 rsvd;
 };
 
index d1c755f78aaf63fb02f7a3a6451a08de16ce17bc..6f77ac487743edfbe899f470805f58cc38d9c61b 100644 (file)
@@ -616,6 +616,7 @@ enum fw_cmd_opcodes {
        FW_RSS_IND_TBL_CMD             = 0x20,
        FW_RSS_GLB_CONFIG_CMD          = 0x22,
        FW_RSS_VI_CONFIG_CMD           = 0x23,
+       FW_CLIP_CMD                    = 0x28,
        FW_LASTC2E_CMD                 = 0x40,
        FW_ERROR_CMD                   = 0x80,
        FW_DEBUG_CMD                   = 0x81,
@@ -2062,6 +2063,28 @@ struct fw_rss_vi_config_cmd {
        } u;
 };
 
+struct fw_clip_cmd {
+       __be32 op_to_write;
+       __be32 alloc_to_len16;
+       __be64 ip_hi;
+       __be64 ip_lo;
+       __be32 r4[2];
+};
+
+#define S_FW_CLIP_CMD_ALLOC     31
+#define M_FW_CLIP_CMD_ALLOC     0x1
+#define V_FW_CLIP_CMD_ALLOC(x)  ((x) << S_FW_CLIP_CMD_ALLOC)
+#define G_FW_CLIP_CMD_ALLOC(x)  \
+       (((x) >> S_FW_CLIP_CMD_ALLOC) & M_FW_CLIP_CMD_ALLOC)
+#define F_FW_CLIP_CMD_ALLOC     V_FW_CLIP_CMD_ALLOC(1U)
+
+#define S_FW_CLIP_CMD_FREE      30
+#define M_FW_CLIP_CMD_FREE      0x1
+#define V_FW_CLIP_CMD_FREE(x)   ((x) << S_FW_CLIP_CMD_FREE)
+#define G_FW_CLIP_CMD_FREE(x)   \
+       (((x) >> S_FW_CLIP_CMD_FREE) & M_FW_CLIP_CMD_FREE)
+#define F_FW_CLIP_CMD_FREE      V_FW_CLIP_CMD_FREE(1U)
+
 enum fw_error_type {
        FW_ERROR_TYPE_EXCEPTION         = 0x0,
        FW_ERROR_TYPE_HWMODULE          = 0x1,
index f984a89c27df1afa64a4cfbc10075479ccd76cf9..dd6876321116a0bbad26a5fbea55aca3731e024f 100644 (file)
@@ -1909,7 +1909,8 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc)
        int log_rq_stride = qpc->rq_size_stride & 7;
        int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1;
        int rss = (be32_to_cpu(qpc->flags) >> 13) & 1;
-       int xrc = (be32_to_cpu(qpc->local_qpn) >> 23) & 1;
+       u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+       int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0;
        int sq_size;
        int rq_size;
        int total_pages;
index 52c23a892bab3cec77af9e8f662fe2ea3d47adfb..d73423c37c255c6174310060bdc8831ccb88b618 100644 (file)
@@ -1052,11 +1052,6 @@ struct _rule_hw {
        };
 };
 
-/* translating DMFS verbs sniffer rule to the FW API would need two reg IDs */
-struct mlx4_flow_handle {
-       u64 reg_id[2];
-};
-
 int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
                                enum mlx4_net_trans_promisc_mode mode);
 int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
index 645c3cedce9ca7abb69c13769a05aff02d4b5dd4..e393171e2facd2ad6317783560ceae6f89f80763 100644 (file)
@@ -116,7 +116,8 @@ enum ib_device_cap_flags {
        IB_DEVICE_MEM_MGT_EXTENSIONS    = (1<<21),
        IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
        IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
-       IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24)
+       IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
+       IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29)
 };
 
 enum ib_atomic_cap {
@@ -635,6 +636,12 @@ enum ib_qp_create_flags {
        IB_QP_CREATE_RESERVED_END               = 1 << 31,
 };
 
+
+/*
+ * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
+ * callback to destroy the passed in QP.
+ */
+
 struct ib_qp_init_attr {
        void                  (*event_handler)(struct ib_event *, void *);
        void                   *qp_context;
@@ -953,6 +960,7 @@ struct ib_ucontext {
        struct list_head        srq_list;
        struct list_head        ah_list;
        struct list_head        xrcd_list;
+       struct list_head        rule_list;
        int                     closing;
 };
 
@@ -1033,7 +1041,8 @@ struct ib_qp {
        struct ib_srq          *srq;
        struct ib_xrcd         *xrcd; /* XRC TGT QPs only */
        struct list_head        xrcd_list;
-       atomic_t                usecnt; /* count times opened, mcast attaches */
+       /* count times opened, mcast attaches, flow attaches */
+       atomic_t                usecnt;
        struct list_head        open_list;
        struct ib_qp           *real_qp;
        struct ib_uobject      *uobject;
@@ -1068,6 +1077,112 @@ struct ib_fmr {
        u32                     rkey;
 };
 
+/* Supported steering options */
+enum ib_flow_attr_type {
+       /* steering according to rule specifications */
+       IB_FLOW_ATTR_NORMAL             = 0x0,
+       /* default unicast and multicast rule -
+        * receive all Eth traffic which isn't steered to any QP
+        */
+       IB_FLOW_ATTR_ALL_DEFAULT        = 0x1,
+       /* default multicast rule -
+        * receive all Eth multicast traffic which isn't steered to any QP
+        */
+       IB_FLOW_ATTR_MC_DEFAULT         = 0x2,
+       /* sniffer rule - receive all port traffic */
+       IB_FLOW_ATTR_SNIFFER            = 0x3
+};
+
+/* Supported steering header types */
+enum ib_flow_spec_type {
+       /* L2 headers*/
+       IB_FLOW_SPEC_ETH        = 0x20,
+       /* L3 header*/
+       IB_FLOW_SPEC_IPV4       = 0x30,
+       /* L4 headers*/
+       IB_FLOW_SPEC_TCP        = 0x40,
+       IB_FLOW_SPEC_UDP        = 0x41
+};
+
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+
+/* Flow steering rule priority is set according to it's domain.
+ * Lower domain value means higher priority.
+ */
+enum ib_flow_domain {
+       IB_FLOW_DOMAIN_USER,
+       IB_FLOW_DOMAIN_ETHTOOL,
+       IB_FLOW_DOMAIN_RFS,
+       IB_FLOW_DOMAIN_NIC,
+       IB_FLOW_DOMAIN_NUM /* Must be last */
+};
+
+struct ib_flow_eth_filter {
+       u8      dst_mac[6];
+       u8      src_mac[6];
+       __be16  ether_type;
+       __be16  vlan_tag;
+};
+
+struct ib_flow_spec_eth {
+       enum ib_flow_spec_type    type;
+       u16                       size;
+       struct ib_flow_eth_filter val;
+       struct ib_flow_eth_filter mask;
+};
+
+struct ib_flow_ipv4_filter {
+       __be32  src_ip;
+       __be32  dst_ip;
+};
+
+struct ib_flow_spec_ipv4 {
+       enum ib_flow_spec_type     type;
+       u16                        size;
+       struct ib_flow_ipv4_filter val;
+       struct ib_flow_ipv4_filter mask;
+};
+
+struct ib_flow_tcp_udp_filter {
+       __be16  dst_port;
+       __be16  src_port;
+};
+
+struct ib_flow_spec_tcp_udp {
+       enum ib_flow_spec_type        type;
+       u16                           size;
+       struct ib_flow_tcp_udp_filter val;
+       struct ib_flow_tcp_udp_filter mask;
+};
+
+union ib_flow_spec {
+       struct {
+               enum ib_flow_spec_type  type;
+               u16                     size;
+       };
+       struct ib_flow_spec_eth         eth;
+       struct ib_flow_spec_ipv4        ipv4;
+       struct ib_flow_spec_tcp_udp     tcp_udp;
+};
+
+struct ib_flow_attr {
+       enum ib_flow_attr_type type;
+       u16          size;
+       u16          priority;
+       u32          flags;
+       u8           num_of_specs;
+       u8           port;
+       /* Following are the optional layers according to user request
+        * struct ib_flow_spec_xxx
+        * struct ib_flow_spec_yyy
+        */
+};
+
+struct ib_flow {
+       struct ib_qp            *qp;
+       struct ib_uobject       *uobject;
+};
+
 struct ib_mad;
 struct ib_grh;
 
@@ -1300,6 +1415,11 @@ struct ib_device {
                                                 struct ib_ucontext *ucontext,
                                                 struct ib_udata *udata);
        int                        (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+       struct ib_flow *           (*create_flow)(struct ib_qp *qp,
+                                                 struct ib_flow_attr
+                                                 *flow_attr,
+                                                 int domain);
+       int                        (*destroy_flow)(struct ib_flow *flow_id);
 
        struct ib_dma_mapping_ops   *dma_ops;
 
@@ -2260,4 +2380,8 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
  */
 int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
 
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+                              struct ib_flow_attr *flow_attr, int domain);
+int ib_destroy_flow(struct ib_flow *flow_id);
+
 #endif /* IB_VERBS_H */
index 1a046b1595ccd2ad4e24b78f8bb5647aadbee29b..1017e0bdf8baa75beb5ce0a13f852ddd7c683c4d 100644 (file)
@@ -49,8 +49,8 @@ enum iw_cm_event_type {
 struct iw_cm_event {
        enum iw_cm_event_type event;
        int                      status;
-       struct sockaddr_in local_addr;
-       struct sockaddr_in remote_addr;
+       struct sockaddr_storage local_addr;
+       struct sockaddr_storage remote_addr;
        void *private_data;
        void *provider_data;
        u8 private_data_len;
@@ -83,8 +83,8 @@ struct iw_cm_id {
        iw_cm_handler           cm_handler;      /* client callback function */
        void                    *context;        /* client cb context */
        struct ib_device        *device;
-       struct sockaddr_in      local_addr;
-       struct sockaddr_in      remote_addr;
+       struct sockaddr_storage local_addr;
+       struct sockaddr_storage remote_addr;
        void                    *provider_data;  /* provider private data */
        iw_event_handler        event_handler;   /* cb for provider
                                                    events */
index 805711ea200596d3bba291f7a06902963316510b..0b233c56b0e402ef75f691b5dcf6c6f1cc87a01b 100644 (file)
@@ -43,6 +43,7 @@
  * compatibility are made.
  */
 #define IB_USER_VERBS_ABI_VERSION      6
+#define IB_USER_VERBS_CMD_THRESHOLD    50
 
 enum {
        IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +86,9 @@ enum {
        IB_USER_VERBS_CMD_OPEN_XRCD,
        IB_USER_VERBS_CMD_CLOSE_XRCD,
        IB_USER_VERBS_CMD_CREATE_XSRQ,
-       IB_USER_VERBS_CMD_OPEN_QP
+       IB_USER_VERBS_CMD_OPEN_QP,
+       IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
+       IB_USER_VERBS_CMD_DESTROY_FLOW
 };
 
 /*
@@ -123,6 +126,15 @@ struct ib_uverbs_cmd_hdr {
        __u16 out_words;
 };
 
+struct ib_uverbs_cmd_hdr_ex {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u16 provider_in_words;
+       __u16 provider_out_words;
+       __u32 cmd_hdr_reserved;
+};
+
 struct ib_uverbs_get_context {
        __u64 response;
        __u64 driver_data[0];
@@ -684,6 +696,91 @@ struct ib_uverbs_detach_mcast {
        __u64 driver_data[0];
 };
 
+struct ib_kern_eth_filter {
+       __u8  dst_mac[6];
+       __u8  src_mac[6];
+       __be16 ether_type;
+       __be16 vlan_tag;
+};
+
+struct ib_kern_spec_eth {
+       __u32  type;
+       __u16  size;
+       __u16  reserved;
+       struct ib_kern_eth_filter val;
+       struct ib_kern_eth_filter mask;
+};
+
+struct ib_kern_ipv4_filter {
+       __be32 src_ip;
+       __be32 dst_ip;
+};
+
+struct ib_kern_spec_ipv4 {
+       __u32  type;
+       __u16  size;
+       __u16  reserved;
+       struct ib_kern_ipv4_filter val;
+       struct ib_kern_ipv4_filter mask;
+};
+
+struct ib_kern_tcp_udp_filter {
+       __be16 dst_port;
+       __be16 src_port;
+};
+
+struct ib_kern_spec_tcp_udp {
+       __u32  type;
+       __u16  size;
+       __u16  reserved;
+       struct ib_kern_tcp_udp_filter val;
+       struct ib_kern_tcp_udp_filter mask;
+};
+
+struct ib_kern_spec {
+       union {
+               struct {
+                       __u32 type;
+                       __u16 size;
+                       __u16 reserved;
+               };
+               struct ib_kern_spec_eth     eth;
+               struct ib_kern_spec_ipv4    ipv4;
+               struct ib_kern_spec_tcp_udp tcp_udp;
+       };
+};
+
+struct ib_kern_flow_attr {
+       __u32 type;
+       __u16 size;
+       __u16 priority;
+       __u8  num_of_specs;
+       __u8  reserved[2];
+       __u8  port;
+       __u32 flags;
+       /* Following are the optional layers according to user request
+        * struct ib_flow_spec_xxx
+        * struct ib_flow_spec_yyy
+        */
+};
+
+struct ib_uverbs_create_flow  {
+       __u32 comp_mask;
+       __u64 response;
+       __u32 qp_handle;
+       struct ib_kern_flow_attr flow_attr;
+};
+
+struct ib_uverbs_create_flow_resp {
+       __u32 comp_mask;
+       __u32 flow_handle;
+};
+
+struct ib_uverbs_destroy_flow  {
+       __u32 comp_mask;
+       __u32 flow_handle;
+};
+
 struct ib_uverbs_create_srq {
        __u64 response;
        __u64 user_handle;