2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static int topspin_workarounds = 1;
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
102 static const struct kernel_param_ops srp_tmo_ops;
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
142 static struct ib_client srp_client = {
145 .remove = srp_remove_one
148 static struct ib_sa_client srp_sa_client;
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
152 int tmo = *(int *)kp->arg;
155 return sprintf(buffer, "%d", tmo);
157 return sprintf(buffer, "off");
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
164 res = srp_parse_tmo(&tmo, val);
168 if (kp->arg == &srp_reconnect_delay)
169 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
171 else if (kp->arg == &srp_fast_io_fail_tmo)
172 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
174 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
178 *(int *)kp->arg = tmo;
184 static const struct kernel_param_ops srp_tmo_ops = {
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
191 return (struct srp_target_port *) host->hostdata;
194 static const char *srp_target_info(struct Scsi_Host *host)
196 return host_to_target(host)->target_name;
199 static int srp_target_is_topspin(struct srp_target_port *target)
201 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
204 return topspin_workarounds &&
205 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
211 enum dma_data_direction direction)
215 iu = kmalloc(sizeof *iu, gfp_mask);
219 iu->buf = kzalloc(size, gfp_mask);
223 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
225 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
229 iu->direction = direction;
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
246 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 static void srp_qp_event(struct ib_event *event, void *context)
254 pr_debug("QP event %s (%d)\n",
255 ib_event_msg(event->event), event->event);
258 static int srp_init_qp(struct srp_target_port *target,
261 struct ib_qp_attr *attr;
264 attr = kmalloc(sizeof *attr, GFP_KERNEL);
268 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port,
270 be16_to_cpu(target->pkey),
275 attr->qp_state = IB_QPS_INIT;
276 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 IB_ACCESS_REMOTE_WRITE);
278 attr->port_num = target->srp_host->port;
280 ret = ib_modify_qp(qp, attr,
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
293 struct srp_target_port *target = ch->target;
294 struct ib_cm_id *new_cm_id;
296 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298 if (IS_ERR(new_cm_id))
299 return PTR_ERR(new_cm_id);
302 ib_destroy_cm_id(ch->cm_id);
303 ch->cm_id = new_cm_id;
304 ch->path.sgid = target->sgid;
305 ch->path.dgid = target->orig_dgid;
306 ch->path.pkey = target->pkey;
307 ch->path.service_id = target->service_id;
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
314 struct srp_device *dev = target->srp_host->srp_dev;
315 struct ib_fmr_pool_param fmr_param;
317 memset(&fmr_param, 0, sizeof(fmr_param));
318 fmr_param.pool_size = target->scsi_host->can_queue;
319 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
321 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 fmr_param.page_shift = ilog2(dev->mr_page_size);
323 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
324 IB_ACCESS_REMOTE_WRITE |
325 IB_ACCESS_REMOTE_READ);
327 return ib_create_fmr_pool(dev->pd, &fmr_param);
331 * srp_destroy_fr_pool() - free the resources owned by a pool
332 * @pool: Fast registration pool to be destroyed.
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
337 struct srp_fr_desc *d;
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
350 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
351 * @device: IB device to allocate fast registration descriptors for.
352 * @pd: Protection domain associated with the FR descriptors.
353 * @pool_size: Number of descriptors to allocate.
354 * @max_page_list_len: Maximum fast registration work request page list length.
356 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
357 struct ib_pd *pd, int pool_size,
358 int max_page_list_len)
360 struct srp_fr_pool *pool;
361 struct srp_fr_desc *d;
363 int i, ret = -EINVAL;
368 pool = kzalloc(sizeof(struct srp_fr_pool) +
369 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
372 pool->size = pool_size;
373 pool->max_page_list_len = max_page_list_len;
374 spin_lock_init(&pool->lock);
375 INIT_LIST_HEAD(&pool->free_list);
377 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
378 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
385 list_add_tail(&d->entry, &pool->free_list);
392 srp_destroy_fr_pool(pool);
400 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
401 * @pool: Pool to obtain descriptor from.
403 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
405 struct srp_fr_desc *d = NULL;
408 spin_lock_irqsave(&pool->lock, flags);
409 if (!list_empty(&pool->free_list)) {
410 d = list_first_entry(&pool->free_list, typeof(*d), entry);
413 spin_unlock_irqrestore(&pool->lock, flags);
419 * srp_fr_pool_put() - put an FR descriptor back in the free list
420 * @pool: Pool the descriptor was allocated from.
421 * @desc: Pointer to an array of fast registration descriptor pointers.
422 * @n: Number of descriptors to put back.
424 * Note: The caller must already have queued an invalidation request for
425 * desc->mr->rkey before calling this function.
427 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
433 spin_lock_irqsave(&pool->lock, flags);
434 for (i = 0; i < n; i++)
435 list_add(&desc[i]->entry, &pool->free_list);
436 spin_unlock_irqrestore(&pool->lock, flags);
439 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
441 struct srp_device *dev = target->srp_host->srp_dev;
443 return srp_create_fr_pool(dev->dev, dev->pd,
444 target->scsi_host->can_queue,
445 dev->max_pages_per_mr);
449 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel.
452 * Change a queue pair into the error state and wait until all receive
453 * completions have been processed before destroying it. This avoids that
454 * the receive completion handler can access the queue pair while it is
457 static void srp_destroy_qp(struct srp_rdma_ch *ch)
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
461 struct ib_recv_wr *bad_wr;
464 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected);
467 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
468 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
472 init_completion(&ch->done);
473 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
474 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
476 wait_for_completion(&ch->done);
479 ib_destroy_qp(ch->qp);
482 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
484 struct srp_target_port *target = ch->target;
485 struct srp_device *dev = target->srp_host->srp_dev;
486 struct ib_qp_init_attr *init_attr;
487 struct ib_cq *recv_cq, *send_cq;
489 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL;
491 const int m = dev->use_fast_reg ? 3 : 1;
492 struct ib_cq_init_attr cq_attr = {};
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
499 /* + 1 for SRP_LAST_WR_ID */
500 cq_attr.cqe = target->queue_size + 1;
501 cq_attr.comp_vector = ch->comp_vector;
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
504 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq);
509 cq_attr.cqe = m * target->queue_size;
510 cq_attr.comp_vector = ch->comp_vector;
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
513 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq);
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
520 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1;
523 init_attr->cap.max_recv_sge = 1;
524 init_attr->cap.max_send_sge = 1;
525 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
526 init_attr->qp_type = IB_QPT_RC;
527 init_attr->send_cq = send_cq;
528 init_attr->recv_cq = recv_cq;
530 qp = ib_create_qp(dev->pd, init_attr);
536 ret = srp_init_qp(target, qp);
540 if (dev->use_fast_reg) {
541 fr_pool = srp_alloc_fr_pool(target);
542 if (IS_ERR(fr_pool)) {
543 ret = PTR_ERR(fr_pool);
544 shost_printk(KERN_WARNING, target->scsi_host, PFX
545 "FR pool allocation failed (%d)\n", ret);
548 } else if (dev->use_fmr) {
549 fmr_pool = srp_alloc_fmr_pool(target);
550 if (IS_ERR(fmr_pool)) {
551 ret = PTR_ERR(fmr_pool);
552 shost_printk(KERN_WARNING, target->scsi_host, PFX
553 "FMR pool allocation failed (%d)\n", ret);
561 ib_destroy_cq(ch->recv_cq);
563 ib_destroy_cq(ch->send_cq);
566 ch->recv_cq = recv_cq;
567 ch->send_cq = send_cq;
569 if (dev->use_fast_reg) {
571 srp_destroy_fr_pool(ch->fr_pool);
572 ch->fr_pool = fr_pool;
573 } else if (dev->use_fmr) {
575 ib_destroy_fmr_pool(ch->fmr_pool);
576 ch->fmr_pool = fmr_pool;
586 ib_destroy_cq(send_cq);
589 ib_destroy_cq(recv_cq);
597 * Note: this function may be called without srp_alloc_iu_bufs() having been
598 * invoked. Hence the ch->[rt]x_ring checks.
600 static void srp_free_ch_ib(struct srp_target_port *target,
601 struct srp_rdma_ch *ch)
603 struct srp_device *dev = target->srp_host->srp_dev;
610 ib_destroy_cm_id(ch->cm_id);
614 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
618 if (dev->use_fast_reg) {
620 srp_destroy_fr_pool(ch->fr_pool);
621 } else if (dev->use_fmr) {
623 ib_destroy_fmr_pool(ch->fmr_pool);
626 ib_destroy_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq);
630 * Avoid that the SCSI error handler tries to use this channel after
631 * it has been freed. The SCSI error handler can namely continue
632 * trying to perform recovery actions after scsi_remove_host()
638 ch->send_cq = ch->recv_cq = NULL;
641 for (i = 0; i < target->queue_size; ++i)
642 srp_free_iu(target->srp_host, ch->rx_ring[i]);
647 for (i = 0; i < target->queue_size; ++i)
648 srp_free_iu(target->srp_host, ch->tx_ring[i]);
654 static void srp_path_rec_completion(int status,
655 struct ib_sa_path_rec *pathrec,
658 struct srp_rdma_ch *ch = ch_ptr;
659 struct srp_target_port *target = ch->target;
663 shost_printk(KERN_ERR, target->scsi_host,
664 PFX "Got failed path rec status %d\n", status);
670 static int srp_lookup_path(struct srp_rdma_ch *ch)
672 struct srp_target_port *target = ch->target;
675 ch->path.numb_path = 1;
677 init_completion(&ch->done);
679 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680 target->srp_host->srp_dev->dev,
681 target->srp_host->port,
683 IB_SA_PATH_REC_SERVICE_ID |
684 IB_SA_PATH_REC_DGID |
685 IB_SA_PATH_REC_SGID |
686 IB_SA_PATH_REC_NUMB_PATH |
688 SRP_PATH_REC_TIMEOUT_MS,
690 srp_path_rec_completion,
691 ch, &ch->path_query);
692 if (ch->path_query_id < 0)
693 return ch->path_query_id;
695 ret = wait_for_completion_interruptible(&ch->done);
700 shost_printk(KERN_WARNING, target->scsi_host,
701 PFX "Path record query failed\n");
706 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
708 struct srp_target_port *target = ch->target;
710 struct ib_cm_req_param param;
711 struct srp_login_req priv;
715 req = kzalloc(sizeof *req, GFP_KERNEL);
719 req->param.primary_path = &ch->path;
720 req->param.alternate_path = NULL;
721 req->param.service_id = target->service_id;
722 req->param.qp_num = ch->qp->qp_num;
723 req->param.qp_type = ch->qp->qp_type;
724 req->param.private_data = &req->priv;
725 req->param.private_data_len = sizeof req->priv;
726 req->param.flow_control = 1;
728 get_random_bytes(&req->param.starting_psn, 4);
729 req->param.starting_psn &= 0xffffff;
732 * Pick some arbitrary defaults here; we could make these
733 * module parameters if anyone cared about setting them.
735 req->param.responder_resources = 4;
736 req->param.remote_cm_response_timeout = 20;
737 req->param.local_cm_response_timeout = 20;
738 req->param.retry_count = target->tl_retry_count;
739 req->param.rnr_retry_count = 7;
740 req->param.max_cm_retries = 15;
742 req->priv.opcode = SRP_LOGIN_REQ;
744 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746 SRP_BUF_FORMAT_INDIRECT);
747 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
748 SRP_MULTICHAN_SINGLE);
750 * In the published SRP specification (draft rev. 16a), the
751 * port identifier format is 8 bytes of ID extension followed
752 * by 8 bytes of GUID. Older drafts put the two halves in the
753 * opposite order, so that the GUID comes first.
755 * Targets conforming to these obsolete drafts can be
756 * recognized by the I/O Class they report.
758 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759 memcpy(req->priv.initiator_port_id,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.initiator_port_id + 8,
762 &target->initiator_ext, 8);
763 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
764 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
766 memcpy(req->priv.initiator_port_id,
767 &target->initiator_ext, 8);
768 memcpy(req->priv.initiator_port_id + 8,
769 &target->sgid.global.interface_id, 8);
770 memcpy(req->priv.target_port_id, &target->id_ext, 8);
771 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
775 * Topspin/Cisco SRP targets will reject our login unless we
776 * zero out the first 8 bytes of our initiator port ID and set
777 * the second 8 bytes to the local node GUID.
779 if (srp_target_is_topspin(target)) {
780 shost_printk(KERN_DEBUG, target->scsi_host,
781 PFX "Topspin/Cisco initiator port ID workaround "
782 "activated for target GUID %016llx\n",
783 be64_to_cpu(target->ioc_guid));
784 memset(req->priv.initiator_port_id, 0, 8);
785 memcpy(req->priv.initiator_port_id + 8,
786 &target->srp_host->srp_dev->dev->node_guid, 8);
789 status = ib_send_cm_req(ch->cm_id, &req->param);
796 static bool srp_queue_remove_work(struct srp_target_port *target)
798 bool changed = false;
800 spin_lock_irq(&target->lock);
801 if (target->state != SRP_TARGET_REMOVED) {
802 target->state = SRP_TARGET_REMOVED;
805 spin_unlock_irq(&target->lock);
808 queue_work(srp_remove_wq, &target->remove_work);
813 static void srp_disconnect_target(struct srp_target_port *target)
815 struct srp_rdma_ch *ch;
818 /* XXX should send SRP_I_LOGOUT request */
820 for (i = 0; i < target->ch_count; i++) {
822 ch->connected = false;
823 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
824 shost_printk(KERN_DEBUG, target->scsi_host,
825 PFX "Sending CM DREQ failed\n");
830 static void srp_free_req_data(struct srp_target_port *target,
831 struct srp_rdma_ch *ch)
833 struct srp_device *dev = target->srp_host->srp_dev;
834 struct ib_device *ibdev = dev->dev;
835 struct srp_request *req;
841 for (i = 0; i < target->req_ring_size; ++i) {
842 req = &ch->req_ring[i];
843 if (dev->use_fast_reg) {
846 kfree(req->fmr_list);
847 kfree(req->map_page);
849 if (req->indirect_dma_addr) {
850 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
851 target->indirect_size,
854 kfree(req->indirect_desc);
861 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
863 struct srp_target_port *target = ch->target;
864 struct srp_device *srp_dev = target->srp_host->srp_dev;
865 struct ib_device *ibdev = srp_dev->dev;
866 struct srp_request *req;
869 int i, ret = -ENOMEM;
871 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
876 for (i = 0; i < target->req_ring_size; ++i) {
877 req = &ch->req_ring[i];
878 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
882 if (srp_dev->use_fast_reg) {
883 req->fr_list = mr_list;
885 req->fmr_list = mr_list;
886 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
887 sizeof(void *), GFP_KERNEL);
891 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
892 if (!req->indirect_desc)
895 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
896 target->indirect_size,
898 if (ib_dma_mapping_error(ibdev, dma_addr))
901 req->indirect_dma_addr = dma_addr;
910 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
911 * @shost: SCSI host whose attributes to remove from sysfs.
913 * Note: Any attributes defined in the host template and that did not exist
914 * before invocation of this function will be ignored.
916 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
918 struct device_attribute **attr;
920 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
921 device_remove_file(&shost->shost_dev, *attr);
924 static void srp_remove_target(struct srp_target_port *target)
926 struct srp_rdma_ch *ch;
929 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
931 srp_del_scsi_host_attr(target->scsi_host);
932 srp_rport_get(target->rport);
933 srp_remove_host(target->scsi_host);
934 scsi_remove_host(target->scsi_host);
935 srp_stop_rport_timers(target->rport);
936 srp_disconnect_target(target);
937 for (i = 0; i < target->ch_count; i++) {
939 srp_free_ch_ib(target, ch);
941 cancel_work_sync(&target->tl_err_work);
942 srp_rport_put(target->rport);
943 for (i = 0; i < target->ch_count; i++) {
945 srp_free_req_data(target, ch);
950 spin_lock(&target->srp_host->target_lock);
951 list_del(&target->list);
952 spin_unlock(&target->srp_host->target_lock);
954 scsi_host_put(target->scsi_host);
957 static void srp_remove_work(struct work_struct *work)
959 struct srp_target_port *target =
960 container_of(work, struct srp_target_port, remove_work);
962 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
964 srp_remove_target(target);
967 static void srp_rport_delete(struct srp_rport *rport)
969 struct srp_target_port *target = rport->lld_data;
971 srp_queue_remove_work(target);
975 * srp_connected_ch() - number of connected channels
976 * @target: SRP target port.
978 static int srp_connected_ch(struct srp_target_port *target)
982 for (i = 0; i < target->ch_count; i++)
983 c += target->ch[i].connected;
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
990 struct srp_target_port *target = ch->target;
993 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
995 ret = srp_lookup_path(ch);
1000 init_completion(&ch->done);
1001 ret = srp_send_req(ch, multich);
1004 ret = wait_for_completion_interruptible(&ch->done);
1009 * The CM event handling code will set status to
1010 * SRP_PORT_REDIRECT if we get a port redirect REJ
1011 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1012 * redirect REJ back.
1017 ch->connected = true;
1020 case SRP_PORT_REDIRECT:
1021 ret = srp_lookup_path(ch);
1026 case SRP_DLID_REDIRECT:
1029 case SRP_STALE_CONN:
1030 shost_printk(KERN_ERR, target->scsi_host, PFX
1031 "giving up on stale connection\n");
1041 return ret <= 0 ? ret : -ENODEV;
1044 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1046 struct ib_send_wr *bad_wr;
1047 struct ib_send_wr wr = {
1048 .opcode = IB_WR_LOCAL_INV,
1049 .wr_id = LOCAL_INV_WR_ID_MASK,
1053 .ex.invalidate_rkey = rkey,
1056 return ib_post_send(ch->qp, &wr, &bad_wr);
1059 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1060 struct srp_rdma_ch *ch,
1061 struct srp_request *req)
1063 struct srp_target_port *target = ch->target;
1064 struct srp_device *dev = target->srp_host->srp_dev;
1065 struct ib_device *ibdev = dev->dev;
1068 if (!scsi_sglist(scmnd) ||
1069 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1070 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1073 if (dev->use_fast_reg) {
1074 struct srp_fr_desc **pfr;
1076 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1077 res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1079 shost_printk(KERN_ERR, target->scsi_host, PFX
1080 "Queueing INV WR for rkey %#x failed (%d)\n",
1081 (*pfr)->mr->rkey, res);
1082 queue_work(system_long_wq,
1083 &target->tl_err_work);
1087 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1089 } else if (dev->use_fmr) {
1090 struct ib_pool_fmr **pfmr;
1092 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1093 ib_fmr_pool_unmap(*pfmr);
1096 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1097 scmnd->sc_data_direction);
1101 * srp_claim_req - Take ownership of the scmnd associated with a request.
1102 * @ch: SRP RDMA channel.
1103 * @req: SRP request.
1104 * @sdev: If not NULL, only take ownership for this SCSI device.
1105 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1106 * ownership of @req->scmnd if it equals @scmnd.
1109 * Either NULL or a pointer to the SCSI command the caller became owner of.
1111 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1112 struct srp_request *req,
1113 struct scsi_device *sdev,
1114 struct scsi_cmnd *scmnd)
1116 unsigned long flags;
1118 spin_lock_irqsave(&ch->lock, flags);
1120 (!sdev || req->scmnd->device == sdev) &&
1121 (!scmnd || req->scmnd == scmnd)) {
1127 spin_unlock_irqrestore(&ch->lock, flags);
1133 * srp_free_req() - Unmap data and add request to the free request list.
1134 * @ch: SRP RDMA channel.
1135 * @req: Request to be freed.
1136 * @scmnd: SCSI command associated with @req.
1137 * @req_lim_delta: Amount to be added to @target->req_lim.
1139 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1140 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1142 unsigned long flags;
1144 srp_unmap_data(scmnd, ch, req);
1146 spin_lock_irqsave(&ch->lock, flags);
1147 ch->req_lim += req_lim_delta;
1148 spin_unlock_irqrestore(&ch->lock, flags);
1151 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1152 struct scsi_device *sdev, int result)
1154 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1157 srp_free_req(ch, req, scmnd, 0);
1158 scmnd->result = result;
1159 scmnd->scsi_done(scmnd);
1163 static void srp_terminate_io(struct srp_rport *rport)
1165 struct srp_target_port *target = rport->lld_data;
1166 struct srp_rdma_ch *ch;
1167 struct Scsi_Host *shost = target->scsi_host;
1168 struct scsi_device *sdev;
1172 * Invoking srp_terminate_io() while srp_queuecommand() is running
1173 * is not safe. Hence the warning statement below.
1175 shost_for_each_device(sdev, shost)
1176 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1178 for (i = 0; i < target->ch_count; i++) {
1179 ch = &target->ch[i];
1181 for (j = 0; j < target->req_ring_size; ++j) {
1182 struct srp_request *req = &ch->req_ring[j];
1184 srp_finish_req(ch, req, NULL,
1185 DID_TRANSPORT_FAILFAST << 16);
1191 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1192 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1193 * srp_reset_device() or srp_reset_host() calls will occur while this function
1194 * is in progress. One way to realize that is not to call this function
1195 * directly but to call srp_reconnect_rport() instead since that last function
1196 * serializes calls of this function via rport->mutex and also blocks
1197 * srp_queuecommand() calls before invoking this function.
1199 static int srp_rport_reconnect(struct srp_rport *rport)
1201 struct srp_target_port *target = rport->lld_data;
1202 struct srp_rdma_ch *ch;
1204 bool multich = false;
1206 srp_disconnect_target(target);
1208 if (target->state == SRP_TARGET_SCANNING)
1212 * Now get a new local CM ID so that we avoid confusing the target in
1213 * case things are really fouled up. Doing so also ensures that all CM
1214 * callbacks will have finished before a new QP is allocated.
1216 for (i = 0; i < target->ch_count; i++) {
1217 ch = &target->ch[i];
1218 ret += srp_new_cm_id(ch);
1220 for (i = 0; i < target->ch_count; i++) {
1221 ch = &target->ch[i];
1222 for (j = 0; j < target->req_ring_size; ++j) {
1223 struct srp_request *req = &ch->req_ring[j];
1225 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1228 for (i = 0; i < target->ch_count; i++) {
1229 ch = &target->ch[i];
1231 * Whether or not creating a new CM ID succeeded, create a new
1232 * QP. This guarantees that all completion callback function
1233 * invocations have finished before request resetting starts.
1235 ret += srp_create_ch_ib(ch);
1237 INIT_LIST_HEAD(&ch->free_tx);
1238 for (j = 0; j < target->queue_size; ++j)
1239 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1242 target->qp_in_error = false;
1244 for (i = 0; i < target->ch_count; i++) {
1245 ch = &target->ch[i];
1248 ret = srp_connect_ch(ch, multich);
1253 shost_printk(KERN_INFO, target->scsi_host,
1254 PFX "reconnect succeeded\n");
1259 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1260 unsigned int dma_len, u32 rkey)
1262 struct srp_direct_buf *desc = state->desc;
1264 WARN_ON_ONCE(!dma_len);
1266 desc->va = cpu_to_be64(dma_addr);
1267 desc->key = cpu_to_be32(rkey);
1268 desc->len = cpu_to_be32(dma_len);
1270 state->total_len += dma_len;
1275 static int srp_map_finish_fmr(struct srp_map_state *state,
1276 struct srp_rdma_ch *ch)
1278 struct srp_target_port *target = ch->target;
1279 struct srp_device *dev = target->srp_host->srp_dev;
1280 struct ib_pool_fmr *fmr;
1283 if (state->fmr.next >= state->fmr.end)
1286 WARN_ON_ONCE(!dev->use_fmr);
1288 if (state->npages == 0)
1291 if (state->npages == 1 && target->global_mr) {
1292 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1293 target->global_mr->rkey);
1297 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1298 state->npages, io_addr);
1300 return PTR_ERR(fmr);
1302 *state->fmr.next++ = fmr;
1305 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1306 state->dma_len, fmr->fmr->rkey);
1315 static int srp_map_finish_fr(struct srp_map_state *state,
1316 struct srp_rdma_ch *ch, int sg_nents)
1318 struct srp_target_port *target = ch->target;
1319 struct srp_device *dev = target->srp_host->srp_dev;
1320 struct ib_send_wr *bad_wr;
1321 struct ib_reg_wr wr;
1322 struct srp_fr_desc *desc;
1326 if (state->fr.next >= state->fr.end)
1329 WARN_ON_ONCE(!dev->use_fast_reg);
1334 if (sg_nents == 1 && target->global_mr) {
1335 srp_map_desc(state, sg_dma_address(state->sg),
1336 sg_dma_len(state->sg),
1337 target->global_mr->rkey);
1341 desc = srp_fr_pool_get(ch->fr_pool);
1345 rkey = ib_inc_rkey(desc->mr->rkey);
1346 ib_update_fast_reg_key(desc->mr, rkey);
1348 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
1349 if (unlikely(n < 0))
1353 wr.wr.opcode = IB_WR_REG_MR;
1354 wr.wr.wr_id = FAST_REG_WR_ID_MASK;
1356 wr.wr.send_flags = 0;
1358 wr.key = desc->mr->rkey;
1359 wr.access = (IB_ACCESS_LOCAL_WRITE |
1360 IB_ACCESS_REMOTE_READ |
1361 IB_ACCESS_REMOTE_WRITE);
1363 *state->fr.next++ = desc;
1366 srp_map_desc(state, desc->mr->iova,
1367 desc->mr->length, desc->mr->rkey);
1369 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1376 static int srp_map_sg_entry(struct srp_map_state *state,
1377 struct srp_rdma_ch *ch,
1378 struct scatterlist *sg, int sg_index)
1380 struct srp_target_port *target = ch->target;
1381 struct srp_device *dev = target->srp_host->srp_dev;
1382 struct ib_device *ibdev = dev->dev;
1383 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1384 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1385 unsigned int len = 0;
1388 WARN_ON_ONCE(!dma_len);
1391 unsigned offset = dma_addr & ~dev->mr_page_mask;
1392 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1393 ret = srp_map_finish_fmr(state, ch);
1398 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1401 state->base_dma_addr = dma_addr;
1402 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1403 state->dma_len += len;
1409 * If the last entry of the MR wasn't a full page, then we need to
1410 * close it out and start a new one -- we can only merge at page
1414 if (len != dev->mr_page_size)
1415 ret = srp_map_finish_fmr(state, ch);
1419 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1420 struct srp_request *req, struct scatterlist *scat,
1423 struct scatterlist *sg;
1426 state->desc = req->indirect_desc;
1427 state->pages = req->map_page;
1428 state->fmr.next = req->fmr_list;
1429 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1431 for_each_sg(scat, sg, count, i) {
1432 ret = srp_map_sg_entry(state, ch, sg, i);
1437 ret = srp_map_finish_fmr(state, ch);
1441 req->nmdesc = state->nmdesc;
1446 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1447 struct srp_request *req, struct scatterlist *scat,
1450 state->desc = req->indirect_desc;
1451 state->fr.next = req->fr_list;
1452 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1458 n = srp_map_finish_fr(state, ch, count);
1459 if (unlikely(n < 0))
1463 for (i = 0; i < n; i++)
1464 state->sg = sg_next(state->sg);
1467 req->nmdesc = state->nmdesc;
1472 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1473 struct srp_request *req, struct scatterlist *scat,
1476 struct srp_target_port *target = ch->target;
1477 struct srp_device *dev = target->srp_host->srp_dev;
1478 struct scatterlist *sg;
1481 state->desc = req->indirect_desc;
1482 for_each_sg(scat, sg, count, i) {
1483 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1484 ib_sg_dma_len(dev->dev, sg),
1485 target->global_mr->rkey);
1488 req->nmdesc = state->nmdesc;
1494 * Register the indirect data buffer descriptor with the HCA.
1496 * Note: since the indirect data buffer descriptor has been allocated with
1497 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1500 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1501 void **next_mr, void **end_mr, u32 idb_len,
1504 struct srp_target_port *target = ch->target;
1505 struct srp_device *dev = target->srp_host->srp_dev;
1506 struct srp_map_state state;
1507 struct srp_direct_buf idb_desc;
1509 struct scatterlist idb_sg[1];
1512 memset(&state, 0, sizeof(state));
1513 memset(&idb_desc, 0, sizeof(idb_desc));
1514 state.gen.next = next_mr;
1515 state.gen.end = end_mr;
1516 state.desc = &idb_desc;
1517 state.base_dma_addr = req->indirect_dma_addr;
1518 state.dma_len = idb_len;
1520 if (dev->use_fast_reg) {
1522 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1523 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1524 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1525 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1527 ret = srp_map_finish_fr(&state, ch, 1);
1530 } else if (dev->use_fmr) {
1531 state.pages = idb_pages;
1532 state.pages[0] = (req->indirect_dma_addr &
1535 ret = srp_map_finish_fmr(&state, ch);
1542 *idb_rkey = idb_desc.key;
1547 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1548 struct srp_request *req)
1550 struct srp_target_port *target = ch->target;
1551 struct scatterlist *scat;
1552 struct srp_cmd *cmd = req->cmd->buf;
1553 int len, nents, count, ret;
1554 struct srp_device *dev;
1555 struct ib_device *ibdev;
1556 struct srp_map_state state;
1557 struct srp_indirect_buf *indirect_hdr;
1558 u32 idb_len, table_len;
1562 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1563 return sizeof (struct srp_cmd);
1565 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1566 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1567 shost_printk(KERN_WARNING, target->scsi_host,
1568 PFX "Unhandled data direction %d\n",
1569 scmnd->sc_data_direction);
1573 nents = scsi_sg_count(scmnd);
1574 scat = scsi_sglist(scmnd);
1576 dev = target->srp_host->srp_dev;
1579 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1580 if (unlikely(count == 0))
1583 fmt = SRP_DATA_DESC_DIRECT;
1584 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1586 if (count == 1 && target->global_mr) {
1588 * The midlayer only generated a single gather/scatter
1589 * entry, or DMA mapping coalesced everything to a
1590 * single entry. So a direct descriptor along with
1591 * the DMA MR suffices.
1593 struct srp_direct_buf *buf = (void *) cmd->add_data;
1595 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1596 buf->key = cpu_to_be32(target->global_mr->rkey);
1597 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1604 * We have more than one scatter/gather entry, so build our indirect
1605 * descriptor table, trying to merge as many entries as we can.
1607 indirect_hdr = (void *) cmd->add_data;
1609 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1610 target->indirect_size, DMA_TO_DEVICE);
1612 memset(&state, 0, sizeof(state));
1613 if (dev->use_fast_reg)
1614 srp_map_sg_fr(&state, ch, req, scat, count);
1615 else if (dev->use_fmr)
1616 srp_map_sg_fmr(&state, ch, req, scat, count);
1618 srp_map_sg_dma(&state, ch, req, scat, count);
1620 /* We've mapped the request, now pull as much of the indirect
1621 * descriptor table as we can into the command buffer. If this
1622 * target is not using an external indirect table, we are
1623 * guaranteed to fit into the command, as the SCSI layer won't
1624 * give us more S/G entries than we allow.
1626 if (state.ndesc == 1) {
1628 * Memory registration collapsed the sg-list into one entry,
1629 * so use a direct descriptor.
1631 struct srp_direct_buf *buf = (void *) cmd->add_data;
1633 *buf = req->indirect_desc[0];
1637 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1638 !target->allow_ext_sg)) {
1639 shost_printk(KERN_ERR, target->scsi_host,
1640 "Could not fit S/G list into SRP_CMD\n");
1644 count = min(state.ndesc, target->cmd_sg_cnt);
1645 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1646 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1648 fmt = SRP_DATA_DESC_INDIRECT;
1649 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1650 len += count * sizeof (struct srp_direct_buf);
1652 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1653 count * sizeof (struct srp_direct_buf));
1655 if (!target->global_mr) {
1656 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1657 idb_len, &idb_rkey);
1662 idb_rkey = cpu_to_be32(target->global_mr->rkey);
1665 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1666 indirect_hdr->table_desc.key = idb_rkey;
1667 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1668 indirect_hdr->len = cpu_to_be32(state.total_len);
1670 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1671 cmd->data_out_desc_cnt = count;
1673 cmd->data_in_desc_cnt = count;
1675 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1679 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1680 cmd->buf_fmt = fmt << 4;
1688 * Return an IU and possible credit to the free pool
1690 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1691 enum srp_iu_type iu_type)
1693 unsigned long flags;
1695 spin_lock_irqsave(&ch->lock, flags);
1696 list_add(&iu->list, &ch->free_tx);
1697 if (iu_type != SRP_IU_RSP)
1699 spin_unlock_irqrestore(&ch->lock, flags);
1703 * Must be called with ch->lock held to protect req_lim and free_tx.
1704 * If IU is not sent, it must be returned using srp_put_tx_iu().
1707 * An upper limit for the number of allocated information units for each
1709 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1710 * more than Scsi_Host.can_queue requests.
1711 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1712 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1713 * one unanswered SRP request to an initiator.
1715 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1716 enum srp_iu_type iu_type)
1718 struct srp_target_port *target = ch->target;
1719 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1722 srp_send_completion(ch->send_cq, ch);
1724 if (list_empty(&ch->free_tx))
1727 /* Initiator responses to target requests do not consume credits */
1728 if (iu_type != SRP_IU_RSP) {
1729 if (ch->req_lim <= rsv) {
1730 ++target->zero_req_lim;
1737 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1738 list_del(&iu->list);
1742 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1744 struct srp_target_port *target = ch->target;
1746 struct ib_send_wr wr, *bad_wr;
1748 list.addr = iu->dma;
1750 list.lkey = target->lkey;
1753 wr.wr_id = (uintptr_t) iu;
1756 wr.opcode = IB_WR_SEND;
1757 wr.send_flags = IB_SEND_SIGNALED;
1759 return ib_post_send(ch->qp, &wr, &bad_wr);
1762 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1764 struct srp_target_port *target = ch->target;
1765 struct ib_recv_wr wr, *bad_wr;
1768 list.addr = iu->dma;
1769 list.length = iu->size;
1770 list.lkey = target->lkey;
1773 wr.wr_id = (uintptr_t) iu;
1777 return ib_post_recv(ch->qp, &wr, &bad_wr);
1780 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1782 struct srp_target_port *target = ch->target;
1783 struct srp_request *req;
1784 struct scsi_cmnd *scmnd;
1785 unsigned long flags;
1787 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1788 spin_lock_irqsave(&ch->lock, flags);
1789 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1790 spin_unlock_irqrestore(&ch->lock, flags);
1792 ch->tsk_mgmt_status = -1;
1793 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1794 ch->tsk_mgmt_status = rsp->data[3];
1795 complete(&ch->tsk_mgmt_done);
1797 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1798 if (scmnd && scmnd->host_scribble) {
1799 req = (void *)scmnd->host_scribble;
1800 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1805 shost_printk(KERN_ERR, target->scsi_host,
1806 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1807 rsp->tag, ch - target->ch, ch->qp->qp_num);
1809 spin_lock_irqsave(&ch->lock, flags);
1810 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1811 spin_unlock_irqrestore(&ch->lock, flags);
1815 scmnd->result = rsp->status;
1817 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1818 memcpy(scmnd->sense_buffer, rsp->data +
1819 be32_to_cpu(rsp->resp_data_len),
1820 min_t(int, be32_to_cpu(rsp->sense_data_len),
1821 SCSI_SENSE_BUFFERSIZE));
1824 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1825 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1826 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1827 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1828 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1829 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1830 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1831 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1833 srp_free_req(ch, req, scmnd,
1834 be32_to_cpu(rsp->req_lim_delta));
1836 scmnd->host_scribble = NULL;
1837 scmnd->scsi_done(scmnd);
1841 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1844 struct srp_target_port *target = ch->target;
1845 struct ib_device *dev = target->srp_host->srp_dev->dev;
1846 unsigned long flags;
1850 spin_lock_irqsave(&ch->lock, flags);
1851 ch->req_lim += req_delta;
1852 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1853 spin_unlock_irqrestore(&ch->lock, flags);
1856 shost_printk(KERN_ERR, target->scsi_host, PFX
1857 "no IU available to send response\n");
1861 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1862 memcpy(iu->buf, rsp, len);
1863 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1865 err = srp_post_send(ch, iu, len);
1867 shost_printk(KERN_ERR, target->scsi_host, PFX
1868 "unable to post response: %d\n", err);
1869 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1875 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1876 struct srp_cred_req *req)
1878 struct srp_cred_rsp rsp = {
1879 .opcode = SRP_CRED_RSP,
1882 s32 delta = be32_to_cpu(req->req_lim_delta);
1884 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1885 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1886 "problems processing SRP_CRED_REQ\n");
1889 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1890 struct srp_aer_req *req)
1892 struct srp_target_port *target = ch->target;
1893 struct srp_aer_rsp rsp = {
1894 .opcode = SRP_AER_RSP,
1897 s32 delta = be32_to_cpu(req->req_lim_delta);
1899 shost_printk(KERN_ERR, target->scsi_host, PFX
1900 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1902 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1903 shost_printk(KERN_ERR, target->scsi_host, PFX
1904 "problems processing SRP_AER_REQ\n");
1907 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1909 struct srp_target_port *target = ch->target;
1910 struct ib_device *dev = target->srp_host->srp_dev->dev;
1911 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1915 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1918 opcode = *(u8 *) iu->buf;
1921 shost_printk(KERN_ERR, target->scsi_host,
1922 PFX "recv completion, opcode 0x%02x\n", opcode);
1923 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1924 iu->buf, wc->byte_len, true);
1929 srp_process_rsp(ch, iu->buf);
1933 srp_process_cred_req(ch, iu->buf);
1937 srp_process_aer_req(ch, iu->buf);
1941 /* XXX Handle target logout */
1942 shost_printk(KERN_WARNING, target->scsi_host,
1943 PFX "Got target logout request\n");
1947 shost_printk(KERN_WARNING, target->scsi_host,
1948 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1952 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1955 res = srp_post_recv(ch, iu);
1957 shost_printk(KERN_ERR, target->scsi_host,
1958 PFX "Recv failed with error code %d\n", res);
1962 * srp_tl_err_work() - handle a transport layer error
1963 * @work: Work structure embedded in an SRP target port.
1965 * Note: This function may get invoked before the rport has been created,
1966 * hence the target->rport test.
1968 static void srp_tl_err_work(struct work_struct *work)
1970 struct srp_target_port *target;
1972 target = container_of(work, struct srp_target_port, tl_err_work);
1974 srp_start_tl_fail_timers(target->rport);
1977 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1978 bool send_err, struct srp_rdma_ch *ch)
1980 struct srp_target_port *target = ch->target;
1982 if (wr_id == SRP_LAST_WR_ID) {
1983 complete(&ch->done);
1987 if (ch->connected && !target->qp_in_error) {
1988 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1989 shost_printk(KERN_ERR, target->scsi_host, PFX
1990 "LOCAL_INV failed with status %s (%d)\n",
1991 ib_wc_status_msg(wc_status), wc_status);
1992 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1993 shost_printk(KERN_ERR, target->scsi_host, PFX
1994 "FAST_REG_MR failed status %s (%d)\n",
1995 ib_wc_status_msg(wc_status), wc_status);
1997 shost_printk(KERN_ERR, target->scsi_host,
1998 PFX "failed %s status %s (%d) for iu %p\n",
1999 send_err ? "send" : "receive",
2000 ib_wc_status_msg(wc_status), wc_status,
2001 (void *)(uintptr_t)wr_id);
2003 queue_work(system_long_wq, &target->tl_err_work);
2005 target->qp_in_error = true;
2008 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2010 struct srp_rdma_ch *ch = ch_ptr;
2013 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2014 while (ib_poll_cq(cq, 1, &wc) > 0) {
2015 if (likely(wc.status == IB_WC_SUCCESS)) {
2016 srp_handle_recv(ch, &wc);
2018 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2023 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2025 struct srp_rdma_ch *ch = ch_ptr;
2029 while (ib_poll_cq(cq, 1, &wc) > 0) {
2030 if (likely(wc.status == IB_WC_SUCCESS)) {
2031 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2032 list_add(&iu->list, &ch->free_tx);
2034 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2039 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2041 struct srp_target_port *target = host_to_target(shost);
2042 struct srp_rport *rport = target->rport;
2043 struct srp_rdma_ch *ch;
2044 struct srp_request *req;
2046 struct srp_cmd *cmd;
2047 struct ib_device *dev;
2048 unsigned long flags;
2052 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2055 * The SCSI EH thread is the only context from which srp_queuecommand()
2056 * can get invoked for blocked devices (SDEV_BLOCK /
2057 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2058 * locking the rport mutex if invoked from inside the SCSI EH.
2061 mutex_lock(&rport->mutex);
2063 scmnd->result = srp_chkready(target->rport);
2064 if (unlikely(scmnd->result))
2067 WARN_ON_ONCE(scmnd->request->tag < 0);
2068 tag = blk_mq_unique_tag(scmnd->request);
2069 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2070 idx = blk_mq_unique_tag_to_tag(tag);
2071 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2072 dev_name(&shost->shost_gendev), tag, idx,
2073 target->req_ring_size);
2075 spin_lock_irqsave(&ch->lock, flags);
2076 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2077 spin_unlock_irqrestore(&ch->lock, flags);
2082 req = &ch->req_ring[idx];
2083 dev = target->srp_host->srp_dev->dev;
2084 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2087 scmnd->host_scribble = (void *) req;
2090 memset(cmd, 0, sizeof *cmd);
2092 cmd->opcode = SRP_CMD;
2093 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2095 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2100 len = srp_map_data(scmnd, ch, req);
2102 shost_printk(KERN_ERR, target->scsi_host,
2103 PFX "Failed to map data (%d)\n", len);
2105 * If we ran out of memory descriptors (-ENOMEM) because an
2106 * application is queuing many requests with more than
2107 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2108 * to reduce queue depth temporarily.
2110 scmnd->result = len == -ENOMEM ?
2111 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2115 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2118 if (srp_post_send(ch, iu, len)) {
2119 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2127 mutex_unlock(&rport->mutex);
2132 srp_unmap_data(scmnd, ch, req);
2135 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2138 * Avoid that the loops that iterate over the request ring can
2139 * encounter a dangling SCSI command pointer.
2144 if (scmnd->result) {
2145 scmnd->scsi_done(scmnd);
2148 ret = SCSI_MLQUEUE_HOST_BUSY;
2155 * Note: the resources allocated in this function are freed in
2158 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2160 struct srp_target_port *target = ch->target;
2163 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2167 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2172 for (i = 0; i < target->queue_size; ++i) {
2173 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2175 GFP_KERNEL, DMA_FROM_DEVICE);
2176 if (!ch->rx_ring[i])
2180 for (i = 0; i < target->queue_size; ++i) {
2181 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2183 GFP_KERNEL, DMA_TO_DEVICE);
2184 if (!ch->tx_ring[i])
2187 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2193 for (i = 0; i < target->queue_size; ++i) {
2194 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2195 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2208 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2210 uint64_t T_tr_ns, max_compl_time_ms;
2211 uint32_t rq_tmo_jiffies;
2214 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2215 * table 91), both the QP timeout and the retry count have to be set
2216 * for RC QP's during the RTR to RTS transition.
2218 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2219 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2222 * Set target->rq_tmo_jiffies to one second more than the largest time
2223 * it can take before an error completion is generated. See also
2224 * C9-140..142 in the IBTA spec for more information about how to
2225 * convert the QP Local ACK Timeout value to nanoseconds.
2227 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2228 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2229 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2230 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2232 return rq_tmo_jiffies;
2235 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2236 const struct srp_login_rsp *lrsp,
2237 struct srp_rdma_ch *ch)
2239 struct srp_target_port *target = ch->target;
2240 struct ib_qp_attr *qp_attr = NULL;
2245 if (lrsp->opcode == SRP_LOGIN_RSP) {
2246 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2247 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2250 * Reserve credits for task management so we don't
2251 * bounce requests back to the SCSI mid-layer.
2253 target->scsi_host->can_queue
2254 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2255 target->scsi_host->can_queue);
2256 target->scsi_host->cmd_per_lun
2257 = min_t(int, target->scsi_host->can_queue,
2258 target->scsi_host->cmd_per_lun);
2260 shost_printk(KERN_WARNING, target->scsi_host,
2261 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2267 ret = srp_alloc_iu_bufs(ch);
2273 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2277 qp_attr->qp_state = IB_QPS_RTR;
2278 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2282 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2286 for (i = 0; i < target->queue_size; i++) {
2287 struct srp_iu *iu = ch->rx_ring[i];
2289 ret = srp_post_recv(ch, iu);
2294 qp_attr->qp_state = IB_QPS_RTS;
2295 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2299 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2301 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2305 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2314 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2315 struct ib_cm_event *event,
2316 struct srp_rdma_ch *ch)
2318 struct srp_target_port *target = ch->target;
2319 struct Scsi_Host *shost = target->scsi_host;
2320 struct ib_class_port_info *cpi;
2323 switch (event->param.rej_rcvd.reason) {
2324 case IB_CM_REJ_PORT_CM_REDIRECT:
2325 cpi = event->param.rej_rcvd.ari;
2326 ch->path.dlid = cpi->redirect_lid;
2327 ch->path.pkey = cpi->redirect_pkey;
2328 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2329 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2331 ch->status = ch->path.dlid ?
2332 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2335 case IB_CM_REJ_PORT_REDIRECT:
2336 if (srp_target_is_topspin(target)) {
2338 * Topspin/Cisco SRP gateways incorrectly send
2339 * reject reason code 25 when they mean 24
2342 memcpy(ch->path.dgid.raw,
2343 event->param.rej_rcvd.ari, 16);
2345 shost_printk(KERN_DEBUG, shost,
2346 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2347 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2348 be64_to_cpu(ch->path.dgid.global.interface_id));
2350 ch->status = SRP_PORT_REDIRECT;
2352 shost_printk(KERN_WARNING, shost,
2353 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2354 ch->status = -ECONNRESET;
2358 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2359 shost_printk(KERN_WARNING, shost,
2360 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2361 ch->status = -ECONNRESET;
2364 case IB_CM_REJ_CONSUMER_DEFINED:
2365 opcode = *(u8 *) event->private_data;
2366 if (opcode == SRP_LOGIN_REJ) {
2367 struct srp_login_rej *rej = event->private_data;
2368 u32 reason = be32_to_cpu(rej->reason);
2370 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2371 shost_printk(KERN_WARNING, shost,
2372 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2374 shost_printk(KERN_WARNING, shost, PFX
2375 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2377 target->orig_dgid.raw, reason);
2379 shost_printk(KERN_WARNING, shost,
2380 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2381 " opcode 0x%02x\n", opcode);
2382 ch->status = -ECONNRESET;
2385 case IB_CM_REJ_STALE_CONN:
2386 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2387 ch->status = SRP_STALE_CONN;
2391 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2392 event->param.rej_rcvd.reason);
2393 ch->status = -ECONNRESET;
2397 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2399 struct srp_rdma_ch *ch = cm_id->context;
2400 struct srp_target_port *target = ch->target;
2403 switch (event->event) {
2404 case IB_CM_REQ_ERROR:
2405 shost_printk(KERN_DEBUG, target->scsi_host,
2406 PFX "Sending CM REQ failed\n");
2408 ch->status = -ECONNRESET;
2411 case IB_CM_REP_RECEIVED:
2413 srp_cm_rep_handler(cm_id, event->private_data, ch);
2416 case IB_CM_REJ_RECEIVED:
2417 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2420 srp_cm_rej_handler(cm_id, event, ch);
2423 case IB_CM_DREQ_RECEIVED:
2424 shost_printk(KERN_WARNING, target->scsi_host,
2425 PFX "DREQ received - connection closed\n");
2426 ch->connected = false;
2427 if (ib_send_cm_drep(cm_id, NULL, 0))
2428 shost_printk(KERN_ERR, target->scsi_host,
2429 PFX "Sending CM DREP failed\n");
2430 queue_work(system_long_wq, &target->tl_err_work);
2433 case IB_CM_TIMEWAIT_EXIT:
2434 shost_printk(KERN_ERR, target->scsi_host,
2435 PFX "connection closed\n");
2441 case IB_CM_MRA_RECEIVED:
2442 case IB_CM_DREQ_ERROR:
2443 case IB_CM_DREP_RECEIVED:
2447 shost_printk(KERN_WARNING, target->scsi_host,
2448 PFX "Unhandled CM event %d\n", event->event);
2453 complete(&ch->done);
2459 * srp_change_queue_depth - setting device queue depth
2460 * @sdev: scsi device struct
2461 * @qdepth: requested queue depth
2463 * Returns queue depth.
2466 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2468 if (!sdev->tagged_supported)
2470 return scsi_change_queue_depth(sdev, qdepth);
2473 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2476 struct srp_target_port *target = ch->target;
2477 struct srp_rport *rport = target->rport;
2478 struct ib_device *dev = target->srp_host->srp_dev->dev;
2480 struct srp_tsk_mgmt *tsk_mgmt;
2482 if (!ch->connected || target->qp_in_error)
2485 init_completion(&ch->tsk_mgmt_done);
2488 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2489 * invoked while a task management function is being sent.
2491 mutex_lock(&rport->mutex);
2492 spin_lock_irq(&ch->lock);
2493 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2494 spin_unlock_irq(&ch->lock);
2497 mutex_unlock(&rport->mutex);
2502 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2505 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2507 tsk_mgmt->opcode = SRP_TSK_MGMT;
2508 int_to_scsilun(lun, &tsk_mgmt->lun);
2509 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2510 tsk_mgmt->tsk_mgmt_func = func;
2511 tsk_mgmt->task_tag = req_tag;
2513 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2515 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2516 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2517 mutex_unlock(&rport->mutex);
2521 mutex_unlock(&rport->mutex);
2523 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2524 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2530 static int srp_abort(struct scsi_cmnd *scmnd)
2532 struct srp_target_port *target = host_to_target(scmnd->device->host);
2533 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2536 struct srp_rdma_ch *ch;
2539 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2543 tag = blk_mq_unique_tag(scmnd->request);
2544 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2545 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2547 ch = &target->ch[ch_idx];
2548 if (!srp_claim_req(ch, req, NULL, scmnd))
2550 shost_printk(KERN_ERR, target->scsi_host,
2551 "Sending SRP abort for tag %#x\n", tag);
2552 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2553 SRP_TSK_ABORT_TASK) == 0)
2555 else if (target->rport->state == SRP_RPORT_LOST)
2559 srp_free_req(ch, req, scmnd, 0);
2560 scmnd->result = DID_ABORT << 16;
2561 scmnd->scsi_done(scmnd);
2566 static int srp_reset_device(struct scsi_cmnd *scmnd)
2568 struct srp_target_port *target = host_to_target(scmnd->device->host);
2569 struct srp_rdma_ch *ch;
2572 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2574 ch = &target->ch[0];
2575 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2578 if (ch->tsk_mgmt_status)
2581 for (i = 0; i < target->ch_count; i++) {
2582 ch = &target->ch[i];
2583 for (i = 0; i < target->req_ring_size; ++i) {
2584 struct srp_request *req = &ch->req_ring[i];
2586 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2593 static int srp_reset_host(struct scsi_cmnd *scmnd)
2595 struct srp_target_port *target = host_to_target(scmnd->device->host);
2597 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2599 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2602 static int srp_slave_configure(struct scsi_device *sdev)
2604 struct Scsi_Host *shost = sdev->host;
2605 struct srp_target_port *target = host_to_target(shost);
2606 struct request_queue *q = sdev->request_queue;
2607 unsigned long timeout;
2609 if (sdev->type == TYPE_DISK) {
2610 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2611 blk_queue_rq_timeout(q, timeout);
2617 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2620 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2622 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2625 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2628 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2630 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2633 static ssize_t show_service_id(struct device *dev,
2634 struct device_attribute *attr, char *buf)
2636 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2638 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2641 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2644 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2646 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2649 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2652 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2654 return sprintf(buf, "%pI6\n", target->sgid.raw);
2657 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2660 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2661 struct srp_rdma_ch *ch = &target->ch[0];
2663 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2666 static ssize_t show_orig_dgid(struct device *dev,
2667 struct device_attribute *attr, char *buf)
2669 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2671 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2674 static ssize_t show_req_lim(struct device *dev,
2675 struct device_attribute *attr, char *buf)
2677 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2678 struct srp_rdma_ch *ch;
2679 int i, req_lim = INT_MAX;
2681 for (i = 0; i < target->ch_count; i++) {
2682 ch = &target->ch[i];
2683 req_lim = min(req_lim, ch->req_lim);
2685 return sprintf(buf, "%d\n", req_lim);
2688 static ssize_t show_zero_req_lim(struct device *dev,
2689 struct device_attribute *attr, char *buf)
2691 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2693 return sprintf(buf, "%d\n", target->zero_req_lim);
2696 static ssize_t show_local_ib_port(struct device *dev,
2697 struct device_attribute *attr, char *buf)
2699 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2701 return sprintf(buf, "%d\n", target->srp_host->port);
2704 static ssize_t show_local_ib_device(struct device *dev,
2705 struct device_attribute *attr, char *buf)
2707 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2709 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2712 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2715 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2717 return sprintf(buf, "%d\n", target->ch_count);
2720 static ssize_t show_comp_vector(struct device *dev,
2721 struct device_attribute *attr, char *buf)
2723 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2725 return sprintf(buf, "%d\n", target->comp_vector);
2728 static ssize_t show_tl_retry_count(struct device *dev,
2729 struct device_attribute *attr, char *buf)
2731 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2733 return sprintf(buf, "%d\n", target->tl_retry_count);
2736 static ssize_t show_cmd_sg_entries(struct device *dev,
2737 struct device_attribute *attr, char *buf)
2739 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2741 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2744 static ssize_t show_allow_ext_sg(struct device *dev,
2745 struct device_attribute *attr, char *buf)
2747 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2749 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2752 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2753 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2754 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2755 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2756 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2757 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2758 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2759 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2760 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2761 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2762 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2763 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2764 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2765 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2766 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2767 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2769 static struct device_attribute *srp_host_attrs[] = {
2772 &dev_attr_service_id,
2776 &dev_attr_orig_dgid,
2778 &dev_attr_zero_req_lim,
2779 &dev_attr_local_ib_port,
2780 &dev_attr_local_ib_device,
2782 &dev_attr_comp_vector,
2783 &dev_attr_tl_retry_count,
2784 &dev_attr_cmd_sg_entries,
2785 &dev_attr_allow_ext_sg,
2789 static struct scsi_host_template srp_template = {
2790 .module = THIS_MODULE,
2791 .name = "InfiniBand SRP initiator",
2792 .proc_name = DRV_NAME,
2793 .slave_configure = srp_slave_configure,
2794 .info = srp_target_info,
2795 .queuecommand = srp_queuecommand,
2796 .change_queue_depth = srp_change_queue_depth,
2797 .eh_abort_handler = srp_abort,
2798 .eh_device_reset_handler = srp_reset_device,
2799 .eh_host_reset_handler = srp_reset_host,
2800 .skip_settle_delay = true,
2801 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2802 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2804 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2805 .use_clustering = ENABLE_CLUSTERING,
2806 .shost_attrs = srp_host_attrs,
2807 .track_queue_depth = 1,
2810 static int srp_sdev_count(struct Scsi_Host *host)
2812 struct scsi_device *sdev;
2815 shost_for_each_device(sdev, host)
2823 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2824 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2825 * removal has been scheduled.
2826 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2828 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2830 struct srp_rport_identifiers ids;
2831 struct srp_rport *rport;
2833 target->state = SRP_TARGET_SCANNING;
2834 sprintf(target->target_name, "SRP.T10:%016llX",
2835 be64_to_cpu(target->id_ext));
2837 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2840 memcpy(ids.port_id, &target->id_ext, 8);
2841 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2842 ids.roles = SRP_RPORT_ROLE_TARGET;
2843 rport = srp_rport_add(target->scsi_host, &ids);
2844 if (IS_ERR(rport)) {
2845 scsi_remove_host(target->scsi_host);
2846 return PTR_ERR(rport);
2849 rport->lld_data = target;
2850 target->rport = rport;
2852 spin_lock(&host->target_lock);
2853 list_add_tail(&target->list, &host->target_list);
2854 spin_unlock(&host->target_lock);
2856 scsi_scan_target(&target->scsi_host->shost_gendev,
2857 0, target->scsi_id, SCAN_WILD_CARD, 0);
2859 if (srp_connected_ch(target) < target->ch_count ||
2860 target->qp_in_error) {
2861 shost_printk(KERN_INFO, target->scsi_host,
2862 PFX "SCSI scan failed - removing SCSI host\n");
2863 srp_queue_remove_work(target);
2867 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2868 dev_name(&target->scsi_host->shost_gendev),
2869 srp_sdev_count(target->scsi_host));
2871 spin_lock_irq(&target->lock);
2872 if (target->state == SRP_TARGET_SCANNING)
2873 target->state = SRP_TARGET_LIVE;
2874 spin_unlock_irq(&target->lock);
2880 static void srp_release_dev(struct device *dev)
2882 struct srp_host *host =
2883 container_of(dev, struct srp_host, dev);
2885 complete(&host->released);
2888 static struct class srp_class = {
2889 .name = "infiniband_srp",
2890 .dev_release = srp_release_dev
2894 * srp_conn_unique() - check whether the connection to a target is unique
2896 * @target: SRP target port.
2898 static bool srp_conn_unique(struct srp_host *host,
2899 struct srp_target_port *target)
2901 struct srp_target_port *t;
2904 if (target->state == SRP_TARGET_REMOVED)
2909 spin_lock(&host->target_lock);
2910 list_for_each_entry(t, &host->target_list, list) {
2912 target->id_ext == t->id_ext &&
2913 target->ioc_guid == t->ioc_guid &&
2914 target->initiator_ext == t->initiator_ext) {
2919 spin_unlock(&host->target_lock);
2926 * Target ports are added by writing
2928 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2929 * pkey=<P_Key>,service_id=<service ID>
2931 * to the add_target sysfs attribute.
2935 SRP_OPT_ID_EXT = 1 << 0,
2936 SRP_OPT_IOC_GUID = 1 << 1,
2937 SRP_OPT_DGID = 1 << 2,
2938 SRP_OPT_PKEY = 1 << 3,
2939 SRP_OPT_SERVICE_ID = 1 << 4,
2940 SRP_OPT_MAX_SECT = 1 << 5,
2941 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2942 SRP_OPT_IO_CLASS = 1 << 7,
2943 SRP_OPT_INITIATOR_EXT = 1 << 8,
2944 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2945 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2946 SRP_OPT_SG_TABLESIZE = 1 << 11,
2947 SRP_OPT_COMP_VECTOR = 1 << 12,
2948 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2949 SRP_OPT_QUEUE_SIZE = 1 << 14,
2950 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2954 SRP_OPT_SERVICE_ID),
2957 static const match_table_t srp_opt_tokens = {
2958 { SRP_OPT_ID_EXT, "id_ext=%s" },
2959 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2960 { SRP_OPT_DGID, "dgid=%s" },
2961 { SRP_OPT_PKEY, "pkey=%x" },
2962 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2963 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2964 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2965 { SRP_OPT_IO_CLASS, "io_class=%x" },
2966 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2967 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2968 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2969 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2970 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2971 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2972 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2973 { SRP_OPT_ERR, NULL }
2976 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2978 char *options, *sep_opt;
2981 substring_t args[MAX_OPT_ARGS];
2987 options = kstrdup(buf, GFP_KERNEL);
2992 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2996 token = match_token(p, srp_opt_tokens, args);
3000 case SRP_OPT_ID_EXT:
3001 p = match_strdup(args);
3006 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3010 case SRP_OPT_IOC_GUID:
3011 p = match_strdup(args);
3016 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3021 p = match_strdup(args);
3026 if (strlen(p) != 32) {
3027 pr_warn("bad dest GID parameter '%s'\n", p);
3032 for (i = 0; i < 16; ++i) {
3033 strlcpy(dgid, p + i * 2, sizeof(dgid));
3034 if (sscanf(dgid, "%hhx",
3035 &target->orig_dgid.raw[i]) < 1) {
3045 if (match_hex(args, &token)) {
3046 pr_warn("bad P_Key parameter '%s'\n", p);
3049 target->pkey = cpu_to_be16(token);
3052 case SRP_OPT_SERVICE_ID:
3053 p = match_strdup(args);
3058 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3062 case SRP_OPT_MAX_SECT:
3063 if (match_int(args, &token)) {
3064 pr_warn("bad max sect parameter '%s'\n", p);
3067 target->scsi_host->max_sectors = token;
3070 case SRP_OPT_QUEUE_SIZE:
3071 if (match_int(args, &token) || token < 1) {
3072 pr_warn("bad queue_size parameter '%s'\n", p);
3075 target->scsi_host->can_queue = token;
3076 target->queue_size = token + SRP_RSP_SQ_SIZE +
3077 SRP_TSK_MGMT_SQ_SIZE;
3078 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3079 target->scsi_host->cmd_per_lun = token;
3082 case SRP_OPT_MAX_CMD_PER_LUN:
3083 if (match_int(args, &token) || token < 1) {
3084 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3088 target->scsi_host->cmd_per_lun = token;
3091 case SRP_OPT_IO_CLASS:
3092 if (match_hex(args, &token)) {
3093 pr_warn("bad IO class parameter '%s'\n", p);
3096 if (token != SRP_REV10_IB_IO_CLASS &&
3097 token != SRP_REV16A_IB_IO_CLASS) {
3098 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3099 token, SRP_REV10_IB_IO_CLASS,
3100 SRP_REV16A_IB_IO_CLASS);
3103 target->io_class = token;
3106 case SRP_OPT_INITIATOR_EXT:
3107 p = match_strdup(args);
3112 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3116 case SRP_OPT_CMD_SG_ENTRIES:
3117 if (match_int(args, &token) || token < 1 || token > 255) {
3118 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3122 target->cmd_sg_cnt = token;
3125 case SRP_OPT_ALLOW_EXT_SG:
3126 if (match_int(args, &token)) {
3127 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3130 target->allow_ext_sg = !!token;
3133 case SRP_OPT_SG_TABLESIZE:
3134 if (match_int(args, &token) || token < 1 ||
3135 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3136 pr_warn("bad max sg_tablesize parameter '%s'\n",
3140 target->sg_tablesize = token;
3143 case SRP_OPT_COMP_VECTOR:
3144 if (match_int(args, &token) || token < 0) {
3145 pr_warn("bad comp_vector parameter '%s'\n", p);
3148 target->comp_vector = token;
3151 case SRP_OPT_TL_RETRY_COUNT:
3152 if (match_int(args, &token) || token < 2 || token > 7) {
3153 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3157 target->tl_retry_count = token;
3161 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3167 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3170 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3171 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3172 !(srp_opt_tokens[i].token & opt_mask))
3173 pr_warn("target creation request is missing parameter '%s'\n",
3174 srp_opt_tokens[i].pattern);
3176 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3177 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3178 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3179 target->scsi_host->cmd_per_lun,
3180 target->scsi_host->can_queue);
3187 static ssize_t srp_create_target(struct device *dev,
3188 struct device_attribute *attr,
3189 const char *buf, size_t count)
3191 struct srp_host *host =
3192 container_of(dev, struct srp_host, dev);
3193 struct Scsi_Host *target_host;
3194 struct srp_target_port *target;
3195 struct srp_rdma_ch *ch;
3196 struct srp_device *srp_dev = host->srp_dev;
3197 struct ib_device *ibdev = srp_dev->dev;
3198 int ret, node_idx, node, cpu, i;
3199 bool multich = false;
3201 target_host = scsi_host_alloc(&srp_template,
3202 sizeof (struct srp_target_port));
3206 target_host->transportt = ib_srp_transport_template;
3207 target_host->max_channel = 0;
3208 target_host->max_id = 1;
3209 target_host->max_lun = -1LL;
3210 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3212 target = host_to_target(target_host);
3214 target->io_class = SRP_REV16A_IB_IO_CLASS;
3215 target->scsi_host = target_host;
3216 target->srp_host = host;
3217 target->lkey = host->srp_dev->pd->local_dma_lkey;
3218 target->global_mr = host->srp_dev->global_mr;
3219 target->cmd_sg_cnt = cmd_sg_entries;
3220 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3221 target->allow_ext_sg = allow_ext_sg;
3222 target->tl_retry_count = 7;
3223 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3226 * Avoid that the SCSI host can be removed by srp_remove_target()
3227 * before this function returns.
3229 scsi_host_get(target->scsi_host);
3231 mutex_lock(&host->add_target_mutex);
3233 ret = srp_parse_options(buf, target);
3237 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3239 if (!srp_conn_unique(target->srp_host, target)) {
3240 shost_printk(KERN_INFO, target->scsi_host,
3241 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3242 be64_to_cpu(target->id_ext),
3243 be64_to_cpu(target->ioc_guid),
3244 be64_to_cpu(target->initiator_ext));
3249 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3250 target->cmd_sg_cnt < target->sg_tablesize) {
3251 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3252 target->sg_tablesize = target->cmd_sg_cnt;
3255 target_host->sg_tablesize = target->sg_tablesize;
3256 target->indirect_size = target->sg_tablesize *
3257 sizeof (struct srp_direct_buf);
3258 target->max_iu_len = sizeof (struct srp_cmd) +
3259 sizeof (struct srp_indirect_buf) +
3260 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3262 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3263 INIT_WORK(&target->remove_work, srp_remove_work);
3264 spin_lock_init(&target->lock);
3265 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3270 target->ch_count = max_t(unsigned, num_online_nodes(),
3272 min(4 * num_online_nodes(),
3273 ibdev->num_comp_vectors),
3274 num_online_cpus()));
3275 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3281 for_each_online_node(node) {
3282 const int ch_start = (node_idx * target->ch_count /
3283 num_online_nodes());
3284 const int ch_end = ((node_idx + 1) * target->ch_count /
3285 num_online_nodes());
3286 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3287 num_online_nodes() + target->comp_vector)
3288 % ibdev->num_comp_vectors;
3289 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3290 num_online_nodes() + target->comp_vector)
3291 % ibdev->num_comp_vectors;
3294 for_each_online_cpu(cpu) {
3295 if (cpu_to_node(cpu) != node)
3297 if (ch_start + cpu_idx >= ch_end)
3299 ch = &target->ch[ch_start + cpu_idx];
3300 ch->target = target;
3301 ch->comp_vector = cv_start == cv_end ? cv_start :
3302 cv_start + cpu_idx % (cv_end - cv_start);
3303 spin_lock_init(&ch->lock);
3304 INIT_LIST_HEAD(&ch->free_tx);
3305 ret = srp_new_cm_id(ch);
3307 goto err_disconnect;
3309 ret = srp_create_ch_ib(ch);
3311 goto err_disconnect;
3313 ret = srp_alloc_req_data(ch);
3315 goto err_disconnect;
3317 ret = srp_connect_ch(ch, multich);
3319 shost_printk(KERN_ERR, target->scsi_host,
3320 PFX "Connection %d/%d failed\n",
3323 if (node_idx == 0 && cpu_idx == 0) {
3324 goto err_disconnect;
3326 srp_free_ch_ib(target, ch);
3327 srp_free_req_data(target, ch);
3328 target->ch_count = ch - target->ch;
3340 target->scsi_host->nr_hw_queues = target->ch_count;
3342 ret = srp_add_target(host, target);
3344 goto err_disconnect;
3346 if (target->state != SRP_TARGET_REMOVED) {
3347 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3348 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3349 be64_to_cpu(target->id_ext),
3350 be64_to_cpu(target->ioc_guid),
3351 be16_to_cpu(target->pkey),
3352 be64_to_cpu(target->service_id),
3353 target->sgid.raw, target->orig_dgid.raw);
3359 mutex_unlock(&host->add_target_mutex);
3361 scsi_host_put(target->scsi_host);
3363 scsi_host_put(target->scsi_host);
3368 srp_disconnect_target(target);
3370 for (i = 0; i < target->ch_count; i++) {
3371 ch = &target->ch[i];
3372 srp_free_ch_ib(target, ch);
3373 srp_free_req_data(target, ch);
3380 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3382 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3385 struct srp_host *host = container_of(dev, struct srp_host, dev);
3387 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3390 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3392 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3395 struct srp_host *host = container_of(dev, struct srp_host, dev);
3397 return sprintf(buf, "%d\n", host->port);
3400 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3402 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3404 struct srp_host *host;
3406 host = kzalloc(sizeof *host, GFP_KERNEL);
3410 INIT_LIST_HEAD(&host->target_list);
3411 spin_lock_init(&host->target_lock);
3412 init_completion(&host->released);
3413 mutex_init(&host->add_target_mutex);
3414 host->srp_dev = device;
3417 host->dev.class = &srp_class;
3418 host->dev.parent = device->dev->dma_device;
3419 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3421 if (device_register(&host->dev))
3423 if (device_create_file(&host->dev, &dev_attr_add_target))
3425 if (device_create_file(&host->dev, &dev_attr_ibdev))
3427 if (device_create_file(&host->dev, &dev_attr_port))
3433 device_unregister(&host->dev);
3441 static void srp_add_one(struct ib_device *device)
3443 struct srp_device *srp_dev;
3444 struct ib_device_attr *dev_attr;
3445 struct srp_host *host;
3446 int mr_page_shift, p;
3447 u64 max_pages_per_mr;
3449 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3453 if (ib_query_device(device, dev_attr)) {
3454 pr_warn("Query device failed for %s\n", device->name);
3458 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3462 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3463 device->map_phys_fmr && device->unmap_fmr);
3464 srp_dev->has_fr = (dev_attr->device_cap_flags &
3465 IB_DEVICE_MEM_MGT_EXTENSIONS);
3466 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3467 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3469 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3470 (!srp_dev->has_fmr || prefer_fr));
3471 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3474 * Use the smallest page size supported by the HCA, down to a
3475 * minimum of 4096 bytes. We're unlikely to build large sglists
3476 * out of smaller entries.
3478 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
3479 srp_dev->mr_page_size = 1 << mr_page_shift;
3480 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3481 max_pages_per_mr = dev_attr->max_mr_size;
3482 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3483 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3485 if (srp_dev->use_fast_reg) {
3486 srp_dev->max_pages_per_mr =
3487 min_t(u32, srp_dev->max_pages_per_mr,
3488 dev_attr->max_fast_reg_page_list_len);
3490 srp_dev->mr_max_size = srp_dev->mr_page_size *
3491 srp_dev->max_pages_per_mr;
3492 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3493 device->name, mr_page_shift, dev_attr->max_mr_size,
3494 dev_attr->max_fast_reg_page_list_len,
3495 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3497 INIT_LIST_HEAD(&srp_dev->dev_list);
3499 srp_dev->dev = device;
3500 srp_dev->pd = ib_alloc_pd(device);
3501 if (IS_ERR(srp_dev->pd))
3504 if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3505 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3506 IB_ACCESS_LOCAL_WRITE |
3507 IB_ACCESS_REMOTE_READ |
3508 IB_ACCESS_REMOTE_WRITE);
3509 if (IS_ERR(srp_dev->global_mr))
3512 srp_dev->global_mr = NULL;
3515 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3516 host = srp_add_port(srp_dev, p);
3518 list_add_tail(&host->list, &srp_dev->dev_list);
3521 ib_set_client_data(device, &srp_client, srp_dev);
3526 ib_dealloc_pd(srp_dev->pd);
3535 static void srp_remove_one(struct ib_device *device, void *client_data)
3537 struct srp_device *srp_dev;
3538 struct srp_host *host, *tmp_host;
3539 struct srp_target_port *target;
3541 srp_dev = client_data;
3545 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3546 device_unregister(&host->dev);
3548 * Wait for the sysfs entry to go away, so that no new
3549 * target ports can be created.
3551 wait_for_completion(&host->released);
3554 * Remove all target ports.
3556 spin_lock(&host->target_lock);
3557 list_for_each_entry(target, &host->target_list, list)
3558 srp_queue_remove_work(target);
3559 spin_unlock(&host->target_lock);
3562 * Wait for tl_err and target port removal tasks.
3564 flush_workqueue(system_long_wq);
3565 flush_workqueue(srp_remove_wq);
3570 if (srp_dev->global_mr)
3571 ib_dereg_mr(srp_dev->global_mr);
3572 ib_dealloc_pd(srp_dev->pd);
3577 static struct srp_function_template ib_srp_transport_functions = {
3578 .has_rport_state = true,
3579 .reset_timer_if_blocked = true,
3580 .reconnect_delay = &srp_reconnect_delay,
3581 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3582 .dev_loss_tmo = &srp_dev_loss_tmo,
3583 .reconnect = srp_rport_reconnect,
3584 .rport_delete = srp_rport_delete,
3585 .terminate_rport_io = srp_terminate_io,
3588 static int __init srp_init_module(void)
3592 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3594 if (srp_sg_tablesize) {
3595 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3596 if (!cmd_sg_entries)
3597 cmd_sg_entries = srp_sg_tablesize;
3600 if (!cmd_sg_entries)
3601 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3603 if (cmd_sg_entries > 255) {
3604 pr_warn("Clamping cmd_sg_entries to 255\n");
3605 cmd_sg_entries = 255;
3608 if (!indirect_sg_entries)
3609 indirect_sg_entries = cmd_sg_entries;
3610 else if (indirect_sg_entries < cmd_sg_entries) {
3611 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3613 indirect_sg_entries = cmd_sg_entries;
3616 srp_remove_wq = create_workqueue("srp_remove");
3617 if (!srp_remove_wq) {
3623 ib_srp_transport_template =
3624 srp_attach_transport(&ib_srp_transport_functions);
3625 if (!ib_srp_transport_template)
3628 ret = class_register(&srp_class);
3630 pr_err("couldn't register class infiniband_srp\n");
3634 ib_sa_register_client(&srp_sa_client);
3636 ret = ib_register_client(&srp_client);
3638 pr_err("couldn't register IB client\n");
3646 ib_sa_unregister_client(&srp_sa_client);
3647 class_unregister(&srp_class);
3650 srp_release_transport(ib_srp_transport_template);
3653 destroy_workqueue(srp_remove_wq);
3657 static void __exit srp_cleanup_module(void)
3659 ib_unregister_client(&srp_client);
3660 ib_sa_unregister_client(&srp_sa_client);
3661 class_unregister(&srp_class);
3662 srp_release_transport(ib_srp_transport_template);
3663 destroy_workqueue(srp_remove_wq);
3666 module_init(srp_init_module);
3667 module_exit(srp_cleanup_module);