IB/srp: Create an insecure all physical rkey only if needed
[firefly-linux-kernel-4.4.55.git] / drivers / infiniband / ulp / srp / ib_srp.c
index 6ed976d8aa30d72280e4aa8a148891410abc7eda..ca98d3b4728150ed712c84f0b0831f5403882225 100644 (file)
@@ -68,8 +68,8 @@ static unsigned int srp_sg_tablesize;
 static unsigned int cmd_sg_entries;
 static unsigned int indirect_sg_entries;
 static bool allow_ext_sg;
-static bool prefer_fr;
-static bool register_always;
+static bool prefer_fr = true;
+static bool register_always = true;
 static int topspin_workarounds = 1;
 
 module_param(srp_sg_tablesize, uint, 0444);
@@ -546,7 +546,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
        if (ret)
                goto err_qp;
 
-       if (dev->use_fast_reg && dev->has_fr) {
+       if (dev->use_fast_reg) {
                fr_pool = srp_alloc_fr_pool(target);
                if (IS_ERR(fr_pool)) {
                        ret = PTR_ERR(fr_pool);
@@ -557,7 +557,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
                if (ch->fr_pool)
                        srp_destroy_fr_pool(ch->fr_pool);
                ch->fr_pool = fr_pool;
-       } else if (!dev->use_fast_reg && dev->has_fmr) {
+       } else if (dev->use_fmr) {
                fmr_pool = srp_alloc_fmr_pool(target);
                if (IS_ERR(fmr_pool)) {
                        ret = PTR_ERR(fmr_pool);
@@ -623,7 +623,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
        if (dev->use_fast_reg) {
                if (ch->fr_pool)
                        srp_destroy_fr_pool(ch->fr_pool);
-       } else {
+       } else if (dev->use_fmr) {
                if (ch->fmr_pool)
                        ib_destroy_fmr_pool(ch->fmr_pool);
        }
@@ -1085,7 +1085,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
                if (req->nmdesc)
                        srp_fr_pool_put(ch->fr_pool, req->fr_list,
                                        req->nmdesc);
-       } else {
+       } else if (dev->use_fmr) {
                struct ib_pool_fmr **pfmr;
 
                for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
@@ -1260,6 +1260,8 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
 {
        struct srp_direct_buf *desc = state->desc;
 
+       WARN_ON_ONCE(!dma_len);
+
        desc->va = cpu_to_be64(dma_addr);
        desc->key = cpu_to_be32(rkey);
        desc->len = cpu_to_be32(dma_len);
@@ -1277,12 +1279,15 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
        struct ib_pool_fmr *fmr;
        u64 io_addr = 0;
 
+       if (state->fmr.next >= state->fmr.end)
+               return -ENOMEM;
+
        fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
                                   state->npages, io_addr);
        if (IS_ERR(fmr))
                return PTR_ERR(fmr);
 
-       *state->next_fmr++ = fmr;
+       *state->fmr.next++ = fmr;
        state->nmdesc++;
 
        srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
@@ -1301,6 +1306,9 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        struct srp_fr_desc *desc;
        u32 rkey;
 
+       if (state->fr.next >= state->fr.end)
+               return -ENOMEM;
+
        desc = srp_fr_pool_get(ch->fr_pool);
        if (!desc)
                return -ENOMEM;
@@ -1324,7 +1332,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
                                       IB_ACCESS_REMOTE_WRITE);
        wr.wr.fast_reg.rkey = desc->mr->lkey;
 
-       *state->next_fr++ = desc;
+       *state->fr.next++ = desc;
        state->nmdesc++;
 
        srp_map_desc(state, state->base_dma_addr, state->dma_len,
@@ -1337,17 +1345,19 @@ static int srp_finish_mapping(struct srp_map_state *state,
                              struct srp_rdma_ch *ch)
 {
        struct srp_target_port *target = ch->target;
+       struct srp_device *dev = target->srp_host->srp_dev;
        int ret = 0;
 
+       WARN_ON_ONCE(!dev->use_fast_reg && !dev->use_fmr);
+
        if (state->npages == 0)
                return 0;
 
-       if (state->npages == 1 && !register_always)
+       if (state->npages == 1 && target->global_mr)
                srp_map_desc(state, state->base_dma_addr, state->dma_len,
-                            target->rkey);
+                            target->global_mr->rkey);
        else
-               ret = target->srp_host->srp_dev->use_fast_reg ?
-                       srp_map_finish_fr(state, ch) :
+               ret = dev->use_fast_reg ? srp_map_finish_fr(state, ch) :
                        srp_map_finish_fmr(state, ch);
 
        if (ret == 0) {
@@ -1358,59 +1368,19 @@ static int srp_finish_mapping(struct srp_map_state *state,
        return ret;
 }
 
-static void srp_map_update_start(struct srp_map_state *state,
-                                struct scatterlist *sg, int sg_index,
-                                dma_addr_t dma_addr)
-{
-       state->unmapped_sg = sg;
-       state->unmapped_index = sg_index;
-       state->unmapped_addr = dma_addr;
-}
-
 static int srp_map_sg_entry(struct srp_map_state *state,
                            struct srp_rdma_ch *ch,
-                           struct scatterlist *sg, int sg_index,
-                           bool use_mr)
+                           struct scatterlist *sg, int sg_index)
 {
        struct srp_target_port *target = ch->target;
        struct srp_device *dev = target->srp_host->srp_dev;
        struct ib_device *ibdev = dev->dev;
        dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
        unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
-       unsigned int len;
+       unsigned int len = 0;
        int ret;
 
-       if (!dma_len)
-               return 0;
-
-       if (!use_mr) {
-               /*
-                * Once we're in direct map mode for a request, we don't
-                * go back to FMR or FR mode, so no need to update anything
-                * other than the descriptor.
-                */
-               srp_map_desc(state, dma_addr, dma_len, target->rkey);
-               return 0;
-       }
-
-       if (dma_len > dev->mr_max_size) {
-               ret = srp_finish_mapping(state, ch);
-               if (ret)
-                       return ret;
-
-               srp_map_desc(state, dma_addr, dma_len, target->rkey);
-               srp_map_update_start(state, NULL, 0, 0);
-               return 0;
-       }
-
-       /*
-        * If this is the first sg that will be mapped via FMR or via FR, save
-        * our position. We need to know the first unmapped entry, its index,
-        * and the first unmapped address within that entry to be able to
-        * restart mapping after an error.
-        */
-       if (!state->unmapped_sg)
-               srp_map_update_start(state, sg, sg_index, dma_addr);
+       WARN_ON_ONCE(!dma_len);
 
        while (dma_len) {
                unsigned offset = dma_addr & ~dev->mr_page_mask;
@@ -1418,8 +1388,6 @@ static int srp_map_sg_entry(struct srp_map_state *state,
                        ret = srp_finish_mapping(state, ch);
                        if (ret)
                                return ret;
-
-                       srp_map_update_start(state, sg, sg_index, dma_addr);
                }
 
                len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
@@ -1438,11 +1406,8 @@ static int srp_map_sg_entry(struct srp_map_state *state,
         * boundries.
         */
        ret = 0;
-       if (len != dev->mr_page_size) {
+       if (len != dev->mr_page_size)
                ret = srp_finish_mapping(state, ch);
-               if (!ret)
-                       srp_map_update_start(state, NULL, 0, 0);
-       }
        return ret;
 }
 
@@ -1452,50 +1417,80 @@ static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
 {
        struct srp_target_port *target = ch->target;
        struct srp_device *dev = target->srp_host->srp_dev;
-       struct ib_device *ibdev = dev->dev;
        struct scatterlist *sg;
-       int i;
-       bool use_mr;
+       int i, ret;
 
        state->desc     = req->indirect_desc;
        state->pages    = req->map_page;
        if (dev->use_fast_reg) {
-               state->next_fr = req->fr_list;
-               use_mr = !!ch->fr_pool;
-       } else {
-               state->next_fmr = req->fmr_list;
-               use_mr = !!ch->fmr_pool;
+               state->fr.next = req->fr_list;
+               state->fr.end = req->fr_list + target->cmd_sg_cnt;
+       } else if (dev->use_fmr) {
+               state->fmr.next = req->fmr_list;
+               state->fmr.end = req->fmr_list + target->cmd_sg_cnt;
        }
 
-       for_each_sg(scat, sg, count, i) {
-               if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
-                       /*
-                        * Memory registration failed, so backtrack to the
-                        * first unmapped entry and continue on without using
-                        * memory registration.
-                        */
-                       dma_addr_t dma_addr;
-                       unsigned int dma_len;
-
-backtrack:
-                       sg = state->unmapped_sg;
-                       i = state->unmapped_index;
-
-                       dma_addr = ib_sg_dma_address(ibdev, sg);
-                       dma_len = ib_sg_dma_len(ibdev, sg);
-                       dma_len -= (state->unmapped_addr - dma_addr);
-                       dma_addr = state->unmapped_addr;
-                       use_mr = false;
-                       srp_map_desc(state, dma_addr, dma_len, target->rkey);
+       if (dev->use_fast_reg || dev->use_fmr) {
+               for_each_sg(scat, sg, count, i) {
+                       ret = srp_map_sg_entry(state, ch, sg, i);
+                       if (ret)
+                               goto out;
+               }
+               ret = srp_finish_mapping(state, ch);
+               if (ret)
+                       goto out;
+       } else {
+               for_each_sg(scat, sg, count, i) {
+                       srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
+                                    ib_sg_dma_len(dev->dev, sg),
+                                    target->global_mr->rkey);
                }
        }
 
-       if (use_mr && srp_finish_mapping(state, ch))
-               goto backtrack;
-
        req->nmdesc = state->nmdesc;
+       ret = 0;
 
-       return 0;
+out:
+       return ret;
+}
+
+/*
+ * Register the indirect data buffer descriptor with the HCA.
+ *
+ * Note: since the indirect data buffer descriptor has been allocated with
+ * kmalloc() it is guaranteed that this buffer is a physically contiguous
+ * memory buffer.
+ */
+static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
+                      void **next_mr, void **end_mr, u32 idb_len,
+                      __be32 *idb_rkey)
+{
+       struct srp_target_port *target = ch->target;
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct srp_map_state state;
+       struct srp_direct_buf idb_desc;
+       u64 idb_pages[1];
+       int ret;
+
+       memset(&state, 0, sizeof(state));
+       memset(&idb_desc, 0, sizeof(idb_desc));
+       state.gen.next = next_mr;
+       state.gen.end = end_mr;
+       state.desc = &idb_desc;
+       state.pages = idb_pages;
+       state.pages[0] = (req->indirect_dma_addr &
+                         dev->mr_page_mask);
+       state.npages = 1;
+       state.base_dma_addr = req->indirect_dma_addr;
+       state.dma_len = idb_len;
+       ret = srp_finish_mapping(&state, ch);
+       if (ret < 0)
+               goto out;
+
+       *idb_rkey = idb_desc.key;
+
+out:
+       return ret;
 }
 
 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
@@ -1504,12 +1499,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
        struct srp_target_port *target = ch->target;
        struct scatterlist *scat;
        struct srp_cmd *cmd = req->cmd->buf;
-       int len, nents, count;
+       int len, nents, count, ret;
        struct srp_device *dev;
        struct ib_device *ibdev;
        struct srp_map_state state;
        struct srp_indirect_buf *indirect_hdr;
-       u32 table_len;
+       u32 idb_len, table_len;
+       __be32 idb_rkey;
        u8 fmt;
 
        if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
@@ -1536,7 +1532,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
        fmt = SRP_DATA_DESC_DIRECT;
        len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
 
-       if (count == 1 && !register_always) {
+       if (count == 1 && target->global_mr) {
                /*
                 * The midlayer only generated a single gather/scatter
                 * entry, or DMA mapping coalesced everything to a
@@ -1546,7 +1542,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
                struct srp_direct_buf *buf = (void *) cmd->add_data;
 
                buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
-               buf->key = cpu_to_be32(target->rkey);
+               buf->key = cpu_to_be32(target->global_mr->rkey);
                buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
 
                req->nmdesc = 0;
@@ -1591,6 +1587,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 
        count = min(state.ndesc, target->cmd_sg_cnt);
        table_len = state.ndesc * sizeof (struct srp_direct_buf);
+       idb_len = sizeof(struct srp_indirect_buf) + table_len;
 
        fmt = SRP_DATA_DESC_INDIRECT;
        len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
@@ -1599,8 +1596,18 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
        memcpy(indirect_hdr->desc_list, req->indirect_desc,
               count * sizeof (struct srp_direct_buf));
 
+       if (!target->global_mr) {
+               ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
+                                 idb_len, &idb_rkey);
+               if (ret < 0)
+                       return ret;
+               req->nmdesc++;
+       } else {
+               idb_rkey = target->global_mr->rkey;
+       }
+
        indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
-       indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
+       indirect_hdr->table_desc.key = idb_rkey;
        indirect_hdr->table_desc.len = cpu_to_be32(table_len);
        indirect_hdr->len = cpu_to_be32(state.total_len);
 
@@ -3151,7 +3158,7 @@ static ssize_t srp_create_target(struct device *dev,
        target->scsi_host       = target_host;
        target->srp_host        = host;
        target->lkey            = host->srp_dev->pd->local_dma_lkey;
-       target->rkey            = host->srp_dev->mr->rkey;
+       target->global_mr       = host->srp_dev->global_mr;
        target->cmd_sg_cnt      = cmd_sg_entries;
        target->sg_tablesize    = indirect_sg_entries ? : cmd_sg_entries;
        target->allow_ext_sg    = allow_ext_sg;
@@ -3408,6 +3415,7 @@ static void srp_add_one(struct ib_device *device)
 
        srp_dev->use_fast_reg = (srp_dev->has_fr &&
                                 (!srp_dev->has_fmr || prefer_fr));
+       srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
 
        /*
         * Use the smallest page size supported by the HCA, down to a
@@ -3440,12 +3448,16 @@ static void srp_add_one(struct ib_device *device)
        if (IS_ERR(srp_dev->pd))
                goto free_dev;
 
-       srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
-                                   IB_ACCESS_LOCAL_WRITE |
-                                   IB_ACCESS_REMOTE_READ |
-                                   IB_ACCESS_REMOTE_WRITE);
-       if (IS_ERR(srp_dev->mr))
-               goto err_pd;
+       if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
+               srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
+                                                  IB_ACCESS_LOCAL_WRITE |
+                                                  IB_ACCESS_REMOTE_READ |
+                                                  IB_ACCESS_REMOTE_WRITE);
+               if (IS_ERR(srp_dev->global_mr))
+                       goto err_pd;
+       } else {
+               srp_dev->global_mr = NULL;
+       }
 
        for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
                host = srp_add_port(srp_dev, p);
@@ -3502,7 +3514,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data)
                kfree(host);
        }
 
-       ib_dereg_mr(srp_dev->mr);
+       if (srp_dev->global_mr)
+               ib_dereg_mr(srp_dev->global_mr);
        ib_dealloc_pd(srp_dev->pd);
 
        kfree(srp_dev);