IB/mlx4: Add mechanism to support flow steering over IB links
authorMatan Barak <matanb@mellanox.com>
Thu, 7 Nov 2013 13:25:16 +0000 (15:25 +0200)
committerRoland Dreier <roland@purestorage.com>
Tue, 14 Jan 2014 22:06:50 +0000 (14:06 -0800)
The mlx4 device requires adding IB flow spec to rules that apply over
infiniband link layer.  This patch adds a mechanism to add such a rule.

If higher levels e.g. IP/UDP/TCP flow specs are provided, the device
requires us to add an empty wild-carded IB rule. Furthermore, the device
requires the QPN to be put in the rule.

Add here specific parsing support for IB empty rules and the ability
to self-generate missing specs based on existing ones.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/mlx4/main.c

index e8d0c5592fd3ec17838147a533153a2cf534b7f1..6b7f227ca9e46f9ec5494709272dfeffc3137e03 100644 (file)
@@ -55,6 +55,7 @@
 #define DRV_RELDATE    "April 4, 2008"
 
 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -825,6 +826,7 @@ struct mlx4_ib_steering {
 };
 
 static int parse_flow_attr(struct mlx4_dev *dev,
+                          u32 qp_num,
                           union ib_flow_spec *ib_spec,
                           struct _rule_hw *mlx4_spec)
 {
@@ -840,6 +842,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
                mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
                mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
                break;
+       case IB_FLOW_SPEC_IB:
+               type = MLX4_NET_TRANS_RULE_ID_IB;
+               mlx4_spec->ib.l3_qpn =
+                       cpu_to_be32(qp_num);
+               mlx4_spec->ib.qpn_mask =
+                       cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+               break;
+
 
        case IB_FLOW_SPEC_IPV4:
                type = MLX4_NET_TRANS_RULE_ID_IPV4;
@@ -871,6 +881,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
        return mlx4_hw_rule_sz(dev, type);
 }
 
+struct default_rules {
+       __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+       __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+       __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+       __u8  link_layer;
+};
+static const struct default_rules default_table[] = {
+       {
+               .mandatory_fields = {IB_FLOW_SPEC_IPV4},
+               .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+               .rules_create_list = {IB_FLOW_SPEC_IB},
+               .link_layer = IB_LINK_LAYER_INFINIBAND
+       }
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+                                        struct ib_flow_attr *flow_attr)
+{
+       int i, j, k;
+       void *ib_flow;
+       const struct default_rules *pdefault_rules = default_table;
+       u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+       for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+            pdefault_rules++) {
+               __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+               memset(&field_types, 0, sizeof(field_types));
+
+               if (link_layer != pdefault_rules->link_layer)
+                       continue;
+
+               ib_flow = flow_attr + 1;
+               /* we assume the specs are sorted */
+               for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+                    j < flow_attr->num_of_specs; k++) {
+                       union ib_flow_spec *current_flow =
+                               (union ib_flow_spec *)ib_flow;
+
+                       /* same layer but different type */
+                       if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+                            (pdefault_rules->mandatory_fields[k] &
+                             IB_FLOW_SPEC_LAYER_MASK)) &&
+                           (current_flow->type !=
+                            pdefault_rules->mandatory_fields[k]))
+                               goto out;
+
+                       /* same layer, try match next one */
+                       if (current_flow->type ==
+                           pdefault_rules->mandatory_fields[k]) {
+                               j++;
+                               ib_flow +=
+                                       ((union ib_flow_spec *)ib_flow)->size;
+                       }
+               }
+
+               ib_flow = flow_attr + 1;
+               for (j = 0; j < flow_attr->num_of_specs;
+                    j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+                       for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+                               /* same layer and same type */
+                               if (((union ib_flow_spec *)ib_flow)->type ==
+                                   pdefault_rules->mandatory_not_fields[k])
+                                       goto out;
+
+               return i;
+       }
+out:
+       return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+               struct mlx4_ib_dev *mdev,
+               struct ib_qp *qp,
+               const struct default_rules *pdefault_rules,
+               struct _rule_hw *mlx4_spec) {
+       int size = 0;
+       int i;
+
+       for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+                       sizeof(pdefault_rules->rules_create_list[0]); i++) {
+               int ret;
+               union ib_flow_spec ib_spec;
+               switch (pdefault_rules->rules_create_list[i]) {
+               case 0:
+                       /* no rule */
+                       continue;
+               case IB_FLOW_SPEC_IB:
+                       ib_spec.type = IB_FLOW_SPEC_IB;
+                       ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+                       break;
+               default:
+                       /* invalid rule */
+                       return -EINVAL;
+               }
+               /* We must put empty rule, qpn is being ignored */
+               ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+                                     mlx4_spec);
+               if (ret < 0) {
+                       pr_info("invalid parsing\n");
+                       return -EINVAL;
+               }
+
+               mlx4_spec = (void *)mlx4_spec + ret;
+               size += ret;
+       }
+       return size;
+}
+
 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
                          int domain,
                          enum mlx4_net_trans_promisc_mode flow_type,
@@ -882,6 +1001,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
        struct mlx4_ib_dev *mdev = to_mdev(qp->device);
        struct mlx4_cmd_mailbox *mailbox;
        struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+       int default_flow;
 
        static const u16 __mlx4_domain[] = {
                [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@@ -916,8 +1036,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
 
        ib_flow = flow_attr + 1;
        size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+       /* Add default flows */
+       default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+       if (default_flow >= 0) {
+               ret = __mlx4_ib_create_default_rules(
+                               mdev, qp, default_table + default_flow,
+                               mailbox->buf + size);
+               if (ret < 0) {
+                       mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+                       return -EINVAL;
+               }
+               size += ret;
+       }
        for (i = 0; i < flow_attr->num_of_specs; i++) {
-               ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+               ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+                                     mailbox->buf + size);
                if (ret < 0) {
                        mlx4_free_cmd_mailbox(mdev->dev, mailbox);
                        return -EINVAL;