2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr;
72 static bool register_always;
73 static int topspin_workarounds = 1;
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100 "Use memory registration even for contiguous memory regions");
102 static const struct kernel_param_ops srp_tmo_ops;
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113 "Number of seconds between the observation of a transport"
114 " layer error and failing all I/O. \"off\" means that this"
115 " functionality is disabled.");
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
120 MODULE_PARM_DESC(dev_loss_tmo,
121 "Maximum number of seconds that the SRP transport should"
122 " insulate transport layer errors. After this time has been"
123 " exceeded the SCSI host is removed. Should be"
124 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125 " if fast_io_fail_tmo has not been set. \"off\" means that"
126 " this functionality is disabled.");
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
142 static struct ib_client srp_client = {
145 .remove = srp_remove_one
148 static struct ib_sa_client srp_sa_client;
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
152 int tmo = *(int *)kp->arg;
155 return sprintf(buffer, "%d", tmo);
157 return sprintf(buffer, "off");
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
164 res = srp_parse_tmo(&tmo, val);
168 if (kp->arg == &srp_reconnect_delay)
169 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
171 else if (kp->arg == &srp_fast_io_fail_tmo)
172 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
174 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
178 *(int *)kp->arg = tmo;
184 static const struct kernel_param_ops srp_tmo_ops = {
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
191 return (struct srp_target_port *) host->hostdata;
194 static const char *srp_target_info(struct Scsi_Host *host)
196 return host_to_target(host)->target_name;
199 static int srp_target_is_topspin(struct srp_target_port *target)
201 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
204 return topspin_workarounds &&
205 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
211 enum dma_data_direction direction)
215 iu = kmalloc(sizeof *iu, gfp_mask);
219 iu->buf = kzalloc(size, gfp_mask);
223 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
225 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
229 iu->direction = direction;
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
246 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 static void srp_qp_event(struct ib_event *event, void *context)
254 pr_debug("QP event %s (%d)\n",
255 ib_event_msg(event->event), event->event);
258 static int srp_init_qp(struct srp_target_port *target,
261 struct ib_qp_attr *attr;
264 attr = kmalloc(sizeof *attr, GFP_KERNEL);
268 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port,
270 be16_to_cpu(target->pkey),
275 attr->qp_state = IB_QPS_INIT;
276 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277 IB_ACCESS_REMOTE_WRITE);
278 attr->port_num = target->srp_host->port;
280 ret = ib_modify_qp(qp, attr,
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
293 struct srp_target_port *target = ch->target;
294 struct ib_cm_id *new_cm_id;
296 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
298 if (IS_ERR(new_cm_id))
299 return PTR_ERR(new_cm_id);
302 ib_destroy_cm_id(ch->cm_id);
303 ch->cm_id = new_cm_id;
304 ch->path.sgid = target->sgid;
305 ch->path.dgid = target->orig_dgid;
306 ch->path.pkey = target->pkey;
307 ch->path.service_id = target->service_id;
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
314 struct srp_device *dev = target->srp_host->srp_dev;
315 struct ib_fmr_pool_param fmr_param;
317 memset(&fmr_param, 0, sizeof(fmr_param));
318 fmr_param.pool_size = target->scsi_host->can_queue;
319 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
321 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322 fmr_param.page_shift = ilog2(dev->mr_page_size);
323 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
324 IB_ACCESS_REMOTE_WRITE |
325 IB_ACCESS_REMOTE_READ);
327 return ib_create_fmr_pool(dev->pd, &fmr_param);
331 * srp_destroy_fr_pool() - free the resources owned by a pool
332 * @pool: Fast registration pool to be destroyed.
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
337 struct srp_fr_desc *d;
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
344 ib_free_fast_reg_page_list(d->frpl);
352 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
353 * @device: IB device to allocate fast registration descriptors for.
354 * @pd: Protection domain associated with the FR descriptors.
355 * @pool_size: Number of descriptors to allocate.
356 * @max_page_list_len: Maximum fast registration work request page list length.
358 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
359 struct ib_pd *pd, int pool_size,
360 int max_page_list_len)
362 struct srp_fr_pool *pool;
363 struct srp_fr_desc *d;
365 struct ib_fast_reg_page_list *frpl;
366 int i, ret = -EINVAL;
371 pool = kzalloc(sizeof(struct srp_fr_pool) +
372 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
375 pool->size = pool_size;
376 pool->max_page_list_len = max_page_list_len;
377 spin_lock_init(&pool->lock);
378 INIT_LIST_HEAD(&pool->free_list);
380 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
381 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
388 frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
394 list_add_tail(&d->entry, &pool->free_list);
401 srp_destroy_fr_pool(pool);
409 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
410 * @pool: Pool to obtain descriptor from.
412 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
414 struct srp_fr_desc *d = NULL;
417 spin_lock_irqsave(&pool->lock, flags);
418 if (!list_empty(&pool->free_list)) {
419 d = list_first_entry(&pool->free_list, typeof(*d), entry);
422 spin_unlock_irqrestore(&pool->lock, flags);
428 * srp_fr_pool_put() - put an FR descriptor back in the free list
429 * @pool: Pool the descriptor was allocated from.
430 * @desc: Pointer to an array of fast registration descriptor pointers.
431 * @n: Number of descriptors to put back.
433 * Note: The caller must already have queued an invalidation request for
434 * desc->mr->rkey before calling this function.
436 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
442 spin_lock_irqsave(&pool->lock, flags);
443 for (i = 0; i < n; i++)
444 list_add(&desc[i]->entry, &pool->free_list);
445 spin_unlock_irqrestore(&pool->lock, flags);
448 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
450 struct srp_device *dev = target->srp_host->srp_dev;
452 return srp_create_fr_pool(dev->dev, dev->pd,
453 target->scsi_host->can_queue,
454 dev->max_pages_per_mr);
458 * srp_destroy_qp() - destroy an RDMA queue pair
459 * @ch: SRP RDMA channel.
461 * Change a queue pair into the error state and wait until all receive
462 * completions have been processed before destroying it. This avoids that
463 * the receive completion handler can access the queue pair while it is
466 static void srp_destroy_qp(struct srp_rdma_ch *ch)
468 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
469 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
470 struct ib_recv_wr *bad_wr;
473 /* Destroying a QP and reusing ch->done is only safe if not connected */
474 WARN_ON_ONCE(ch->connected);
476 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
477 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
481 init_completion(&ch->done);
482 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
483 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
485 wait_for_completion(&ch->done);
488 ib_destroy_qp(ch->qp);
491 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
493 struct srp_target_port *target = ch->target;
494 struct srp_device *dev = target->srp_host->srp_dev;
495 struct ib_qp_init_attr *init_attr;
496 struct ib_cq *recv_cq, *send_cq;
498 struct ib_fmr_pool *fmr_pool = NULL;
499 struct srp_fr_pool *fr_pool = NULL;
500 const int m = 1 + dev->use_fast_reg;
501 struct ib_cq_init_attr cq_attr = {};
504 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
508 /* + 1 for SRP_LAST_WR_ID */
509 cq_attr.cqe = target->queue_size + 1;
510 cq_attr.comp_vector = ch->comp_vector;
511 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
513 if (IS_ERR(recv_cq)) {
514 ret = PTR_ERR(recv_cq);
518 cq_attr.cqe = m * target->queue_size;
519 cq_attr.comp_vector = ch->comp_vector;
520 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
522 if (IS_ERR(send_cq)) {
523 ret = PTR_ERR(send_cq);
527 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
529 init_attr->event_handler = srp_qp_event;
530 init_attr->cap.max_send_wr = m * target->queue_size;
531 init_attr->cap.max_recv_wr = target->queue_size + 1;
532 init_attr->cap.max_recv_sge = 1;
533 init_attr->cap.max_send_sge = 1;
534 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
535 init_attr->qp_type = IB_QPT_RC;
536 init_attr->send_cq = send_cq;
537 init_attr->recv_cq = recv_cq;
539 qp = ib_create_qp(dev->pd, init_attr);
545 ret = srp_init_qp(target, qp);
549 if (dev->use_fast_reg && dev->has_fr) {
550 fr_pool = srp_alloc_fr_pool(target);
551 if (IS_ERR(fr_pool)) {
552 ret = PTR_ERR(fr_pool);
553 shost_printk(KERN_WARNING, target->scsi_host, PFX
554 "FR pool allocation failed (%d)\n", ret);
558 srp_destroy_fr_pool(ch->fr_pool);
559 ch->fr_pool = fr_pool;
560 } else if (!dev->use_fast_reg && dev->has_fmr) {
561 fmr_pool = srp_alloc_fmr_pool(target);
562 if (IS_ERR(fmr_pool)) {
563 ret = PTR_ERR(fmr_pool);
564 shost_printk(KERN_WARNING, target->scsi_host, PFX
565 "FMR pool allocation failed (%d)\n", ret);
569 ib_destroy_fmr_pool(ch->fmr_pool);
570 ch->fmr_pool = fmr_pool;
576 ib_destroy_cq(ch->recv_cq);
578 ib_destroy_cq(ch->send_cq);
581 ch->recv_cq = recv_cq;
582 ch->send_cq = send_cq;
591 ib_destroy_cq(send_cq);
594 ib_destroy_cq(recv_cq);
602 * Note: this function may be called without srp_alloc_iu_bufs() having been
603 * invoked. Hence the ch->[rt]x_ring checks.
605 static void srp_free_ch_ib(struct srp_target_port *target,
606 struct srp_rdma_ch *ch)
608 struct srp_device *dev = target->srp_host->srp_dev;
615 ib_destroy_cm_id(ch->cm_id);
619 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
623 if (dev->use_fast_reg) {
625 srp_destroy_fr_pool(ch->fr_pool);
628 ib_destroy_fmr_pool(ch->fmr_pool);
631 ib_destroy_cq(ch->send_cq);
632 ib_destroy_cq(ch->recv_cq);
635 * Avoid that the SCSI error handler tries to use this channel after
636 * it has been freed. The SCSI error handler can namely continue
637 * trying to perform recovery actions after scsi_remove_host()
643 ch->send_cq = ch->recv_cq = NULL;
646 for (i = 0; i < target->queue_size; ++i)
647 srp_free_iu(target->srp_host, ch->rx_ring[i]);
652 for (i = 0; i < target->queue_size; ++i)
653 srp_free_iu(target->srp_host, ch->tx_ring[i]);
659 static void srp_path_rec_completion(int status,
660 struct ib_sa_path_rec *pathrec,
663 struct srp_rdma_ch *ch = ch_ptr;
664 struct srp_target_port *target = ch->target;
668 shost_printk(KERN_ERR, target->scsi_host,
669 PFX "Got failed path rec status %d\n", status);
675 static int srp_lookup_path(struct srp_rdma_ch *ch)
677 struct srp_target_port *target = ch->target;
680 ch->path.numb_path = 1;
682 init_completion(&ch->done);
684 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
685 target->srp_host->srp_dev->dev,
686 target->srp_host->port,
688 IB_SA_PATH_REC_SERVICE_ID |
689 IB_SA_PATH_REC_DGID |
690 IB_SA_PATH_REC_SGID |
691 IB_SA_PATH_REC_NUMB_PATH |
693 SRP_PATH_REC_TIMEOUT_MS,
695 srp_path_rec_completion,
696 ch, &ch->path_query);
697 if (ch->path_query_id < 0)
698 return ch->path_query_id;
700 ret = wait_for_completion_interruptible(&ch->done);
705 shost_printk(KERN_WARNING, target->scsi_host,
706 PFX "Path record query failed\n");
711 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
713 struct srp_target_port *target = ch->target;
715 struct ib_cm_req_param param;
716 struct srp_login_req priv;
720 req = kzalloc(sizeof *req, GFP_KERNEL);
724 req->param.primary_path = &ch->path;
725 req->param.alternate_path = NULL;
726 req->param.service_id = target->service_id;
727 req->param.qp_num = ch->qp->qp_num;
728 req->param.qp_type = ch->qp->qp_type;
729 req->param.private_data = &req->priv;
730 req->param.private_data_len = sizeof req->priv;
731 req->param.flow_control = 1;
733 get_random_bytes(&req->param.starting_psn, 4);
734 req->param.starting_psn &= 0xffffff;
737 * Pick some arbitrary defaults here; we could make these
738 * module parameters if anyone cared about setting them.
740 req->param.responder_resources = 4;
741 req->param.remote_cm_response_timeout = 20;
742 req->param.local_cm_response_timeout = 20;
743 req->param.retry_count = target->tl_retry_count;
744 req->param.rnr_retry_count = 7;
745 req->param.max_cm_retries = 15;
747 req->priv.opcode = SRP_LOGIN_REQ;
749 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
750 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
751 SRP_BUF_FORMAT_INDIRECT);
752 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
753 SRP_MULTICHAN_SINGLE);
755 * In the published SRP specification (draft rev. 16a), the
756 * port identifier format is 8 bytes of ID extension followed
757 * by 8 bytes of GUID. Older drafts put the two halves in the
758 * opposite order, so that the GUID comes first.
760 * Targets conforming to these obsolete drafts can be
761 * recognized by the I/O Class they report.
763 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
764 memcpy(req->priv.initiator_port_id,
765 &target->sgid.global.interface_id, 8);
766 memcpy(req->priv.initiator_port_id + 8,
767 &target->initiator_ext, 8);
768 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
769 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
771 memcpy(req->priv.initiator_port_id,
772 &target->initiator_ext, 8);
773 memcpy(req->priv.initiator_port_id + 8,
774 &target->sgid.global.interface_id, 8);
775 memcpy(req->priv.target_port_id, &target->id_ext, 8);
776 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
780 * Topspin/Cisco SRP targets will reject our login unless we
781 * zero out the first 8 bytes of our initiator port ID and set
782 * the second 8 bytes to the local node GUID.
784 if (srp_target_is_topspin(target)) {
785 shost_printk(KERN_DEBUG, target->scsi_host,
786 PFX "Topspin/Cisco initiator port ID workaround "
787 "activated for target GUID %016llx\n",
788 be64_to_cpu(target->ioc_guid));
789 memset(req->priv.initiator_port_id, 0, 8);
790 memcpy(req->priv.initiator_port_id + 8,
791 &target->srp_host->srp_dev->dev->node_guid, 8);
794 status = ib_send_cm_req(ch->cm_id, &req->param);
801 static bool srp_queue_remove_work(struct srp_target_port *target)
803 bool changed = false;
805 spin_lock_irq(&target->lock);
806 if (target->state != SRP_TARGET_REMOVED) {
807 target->state = SRP_TARGET_REMOVED;
810 spin_unlock_irq(&target->lock);
813 queue_work(srp_remove_wq, &target->remove_work);
818 static void srp_disconnect_target(struct srp_target_port *target)
820 struct srp_rdma_ch *ch;
823 /* XXX should send SRP_I_LOGOUT request */
825 for (i = 0; i < target->ch_count; i++) {
827 ch->connected = false;
828 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
829 shost_printk(KERN_DEBUG, target->scsi_host,
830 PFX "Sending CM DREQ failed\n");
835 static void srp_free_req_data(struct srp_target_port *target,
836 struct srp_rdma_ch *ch)
838 struct srp_device *dev = target->srp_host->srp_dev;
839 struct ib_device *ibdev = dev->dev;
840 struct srp_request *req;
846 for (i = 0; i < target->req_ring_size; ++i) {
847 req = &ch->req_ring[i];
848 if (dev->use_fast_reg)
851 kfree(req->fmr_list);
852 kfree(req->map_page);
853 if (req->indirect_dma_addr) {
854 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
855 target->indirect_size,
858 kfree(req->indirect_desc);
865 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
867 struct srp_target_port *target = ch->target;
868 struct srp_device *srp_dev = target->srp_host->srp_dev;
869 struct ib_device *ibdev = srp_dev->dev;
870 struct srp_request *req;
873 int i, ret = -ENOMEM;
875 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
880 for (i = 0; i < target->req_ring_size; ++i) {
881 req = &ch->req_ring[i];
882 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
886 if (srp_dev->use_fast_reg)
887 req->fr_list = mr_list;
889 req->fmr_list = mr_list;
890 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
891 sizeof(void *), GFP_KERNEL);
894 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
895 if (!req->indirect_desc)
898 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
899 target->indirect_size,
901 if (ib_dma_mapping_error(ibdev, dma_addr))
904 req->indirect_dma_addr = dma_addr;
913 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
914 * @shost: SCSI host whose attributes to remove from sysfs.
916 * Note: Any attributes defined in the host template and that did not exist
917 * before invocation of this function will be ignored.
919 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
921 struct device_attribute **attr;
923 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
924 device_remove_file(&shost->shost_dev, *attr);
927 static void srp_remove_target(struct srp_target_port *target)
929 struct srp_rdma_ch *ch;
932 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
934 srp_del_scsi_host_attr(target->scsi_host);
935 srp_rport_get(target->rport);
936 srp_remove_host(target->scsi_host);
937 scsi_remove_host(target->scsi_host);
938 srp_stop_rport_timers(target->rport);
939 srp_disconnect_target(target);
940 for (i = 0; i < target->ch_count; i++) {
942 srp_free_ch_ib(target, ch);
944 cancel_work_sync(&target->tl_err_work);
945 srp_rport_put(target->rport);
946 for (i = 0; i < target->ch_count; i++) {
948 srp_free_req_data(target, ch);
953 spin_lock(&target->srp_host->target_lock);
954 list_del(&target->list);
955 spin_unlock(&target->srp_host->target_lock);
957 scsi_host_put(target->scsi_host);
960 static void srp_remove_work(struct work_struct *work)
962 struct srp_target_port *target =
963 container_of(work, struct srp_target_port, remove_work);
965 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
967 srp_remove_target(target);
970 static void srp_rport_delete(struct srp_rport *rport)
972 struct srp_target_port *target = rport->lld_data;
974 srp_queue_remove_work(target);
978 * srp_connected_ch() - number of connected channels
979 * @target: SRP target port.
981 static int srp_connected_ch(struct srp_target_port *target)
985 for (i = 0; i < target->ch_count; i++)
986 c += target->ch[i].connected;
991 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
993 struct srp_target_port *target = ch->target;
996 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
998 ret = srp_lookup_path(ch);
1003 init_completion(&ch->done);
1004 ret = srp_send_req(ch, multich);
1007 ret = wait_for_completion_interruptible(&ch->done);
1012 * The CM event handling code will set status to
1013 * SRP_PORT_REDIRECT if we get a port redirect REJ
1014 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1015 * redirect REJ back.
1017 switch (ch->status) {
1019 ch->connected = true;
1022 case SRP_PORT_REDIRECT:
1023 ret = srp_lookup_path(ch);
1028 case SRP_DLID_REDIRECT:
1031 case SRP_STALE_CONN:
1032 shost_printk(KERN_ERR, target->scsi_host, PFX
1033 "giving up on stale connection\n");
1034 ch->status = -ECONNRESET;
1043 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1045 struct ib_send_wr *bad_wr;
1046 struct ib_send_wr wr = {
1047 .opcode = IB_WR_LOCAL_INV,
1048 .wr_id = LOCAL_INV_WR_ID_MASK,
1052 .ex.invalidate_rkey = rkey,
1055 return ib_post_send(ch->qp, &wr, &bad_wr);
1058 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1059 struct srp_rdma_ch *ch,
1060 struct srp_request *req)
1062 struct srp_target_port *target = ch->target;
1063 struct srp_device *dev = target->srp_host->srp_dev;
1064 struct ib_device *ibdev = dev->dev;
1067 if (!scsi_sglist(scmnd) ||
1068 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1069 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1072 if (dev->use_fast_reg) {
1073 struct srp_fr_desc **pfr;
1075 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1076 res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1078 shost_printk(KERN_ERR, target->scsi_host, PFX
1079 "Queueing INV WR for rkey %#x failed (%d)\n",
1080 (*pfr)->mr->rkey, res);
1081 queue_work(system_long_wq,
1082 &target->tl_err_work);
1086 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1089 struct ib_pool_fmr **pfmr;
1091 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1092 ib_fmr_pool_unmap(*pfmr);
1095 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1096 scmnd->sc_data_direction);
1100 * srp_claim_req - Take ownership of the scmnd associated with a request.
1101 * @ch: SRP RDMA channel.
1102 * @req: SRP request.
1103 * @sdev: If not NULL, only take ownership for this SCSI device.
1104 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1105 * ownership of @req->scmnd if it equals @scmnd.
1108 * Either NULL or a pointer to the SCSI command the caller became owner of.
1110 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1111 struct srp_request *req,
1112 struct scsi_device *sdev,
1113 struct scsi_cmnd *scmnd)
1115 unsigned long flags;
1117 spin_lock_irqsave(&ch->lock, flags);
1119 (!sdev || req->scmnd->device == sdev) &&
1120 (!scmnd || req->scmnd == scmnd)) {
1126 spin_unlock_irqrestore(&ch->lock, flags);
1132 * srp_free_req() - Unmap data and add request to the free request list.
1133 * @ch: SRP RDMA channel.
1134 * @req: Request to be freed.
1135 * @scmnd: SCSI command associated with @req.
1136 * @req_lim_delta: Amount to be added to @target->req_lim.
1138 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1139 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1141 unsigned long flags;
1143 srp_unmap_data(scmnd, ch, req);
1145 spin_lock_irqsave(&ch->lock, flags);
1146 ch->req_lim += req_lim_delta;
1147 spin_unlock_irqrestore(&ch->lock, flags);
1150 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1151 struct scsi_device *sdev, int result)
1153 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1156 srp_free_req(ch, req, scmnd, 0);
1157 scmnd->result = result;
1158 scmnd->scsi_done(scmnd);
1162 static void srp_terminate_io(struct srp_rport *rport)
1164 struct srp_target_port *target = rport->lld_data;
1165 struct srp_rdma_ch *ch;
1166 struct Scsi_Host *shost = target->scsi_host;
1167 struct scsi_device *sdev;
1171 * Invoking srp_terminate_io() while srp_queuecommand() is running
1172 * is not safe. Hence the warning statement below.
1174 shost_for_each_device(sdev, shost)
1175 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1177 for (i = 0; i < target->ch_count; i++) {
1178 ch = &target->ch[i];
1180 for (j = 0; j < target->req_ring_size; ++j) {
1181 struct srp_request *req = &ch->req_ring[j];
1183 srp_finish_req(ch, req, NULL,
1184 DID_TRANSPORT_FAILFAST << 16);
1190 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1191 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1192 * srp_reset_device() or srp_reset_host() calls will occur while this function
1193 * is in progress. One way to realize that is not to call this function
1194 * directly but to call srp_reconnect_rport() instead since that last function
1195 * serializes calls of this function via rport->mutex and also blocks
1196 * srp_queuecommand() calls before invoking this function.
1198 static int srp_rport_reconnect(struct srp_rport *rport)
1200 struct srp_target_port *target = rport->lld_data;
1201 struct srp_rdma_ch *ch;
1203 bool multich = false;
1205 srp_disconnect_target(target);
1207 if (target->state == SRP_TARGET_SCANNING)
1211 * Now get a new local CM ID so that we avoid confusing the target in
1212 * case things are really fouled up. Doing so also ensures that all CM
1213 * callbacks will have finished before a new QP is allocated.
1215 for (i = 0; i < target->ch_count; i++) {
1216 ch = &target->ch[i];
1217 ret += srp_new_cm_id(ch);
1219 for (i = 0; i < target->ch_count; i++) {
1220 ch = &target->ch[i];
1221 for (j = 0; j < target->req_ring_size; ++j) {
1222 struct srp_request *req = &ch->req_ring[j];
1224 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1227 for (i = 0; i < target->ch_count; i++) {
1228 ch = &target->ch[i];
1230 * Whether or not creating a new CM ID succeeded, create a new
1231 * QP. This guarantees that all completion callback function
1232 * invocations have finished before request resetting starts.
1234 ret += srp_create_ch_ib(ch);
1236 INIT_LIST_HEAD(&ch->free_tx);
1237 for (j = 0; j < target->queue_size; ++j)
1238 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1241 target->qp_in_error = false;
1243 for (i = 0; i < target->ch_count; i++) {
1244 ch = &target->ch[i];
1247 ret = srp_connect_ch(ch, multich);
1252 shost_printk(KERN_INFO, target->scsi_host,
1253 PFX "reconnect succeeded\n");
1258 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1259 unsigned int dma_len, u32 rkey)
1261 struct srp_direct_buf *desc = state->desc;
1263 desc->va = cpu_to_be64(dma_addr);
1264 desc->key = cpu_to_be32(rkey);
1265 desc->len = cpu_to_be32(dma_len);
1267 state->total_len += dma_len;
1272 static int srp_map_finish_fmr(struct srp_map_state *state,
1273 struct srp_rdma_ch *ch)
1275 struct srp_target_port *target = ch->target;
1276 struct srp_device *dev = target->srp_host->srp_dev;
1277 struct ib_pool_fmr *fmr;
1280 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1281 state->npages, io_addr);
1283 return PTR_ERR(fmr);
1285 *state->next_fmr++ = fmr;
1288 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1289 state->dma_len, fmr->fmr->rkey);
1294 static int srp_map_finish_fr(struct srp_map_state *state,
1295 struct srp_rdma_ch *ch)
1297 struct srp_target_port *target = ch->target;
1298 struct srp_device *dev = target->srp_host->srp_dev;
1299 struct ib_send_wr *bad_wr;
1300 struct ib_send_wr wr;
1301 struct srp_fr_desc *desc;
1304 desc = srp_fr_pool_get(ch->fr_pool);
1308 rkey = ib_inc_rkey(desc->mr->rkey);
1309 ib_update_fast_reg_key(desc->mr, rkey);
1311 memcpy(desc->frpl->page_list, state->pages,
1312 sizeof(state->pages[0]) * state->npages);
1314 memset(&wr, 0, sizeof(wr));
1315 wr.opcode = IB_WR_FAST_REG_MR;
1316 wr.wr_id = FAST_REG_WR_ID_MASK;
1317 wr.wr.fast_reg.iova_start = state->base_dma_addr;
1318 wr.wr.fast_reg.page_list = desc->frpl;
1319 wr.wr.fast_reg.page_list_len = state->npages;
1320 wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1321 wr.wr.fast_reg.length = state->dma_len;
1322 wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1323 IB_ACCESS_REMOTE_READ |
1324 IB_ACCESS_REMOTE_WRITE);
1325 wr.wr.fast_reg.rkey = desc->mr->lkey;
1327 *state->next_fr++ = desc;
1330 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1333 return ib_post_send(ch->qp, &wr, &bad_wr);
1336 static int srp_finish_mapping(struct srp_map_state *state,
1337 struct srp_rdma_ch *ch)
1339 struct srp_target_port *target = ch->target;
1342 if (state->npages == 0)
1345 if (state->npages == 1 && !register_always)
1346 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1349 ret = target->srp_host->srp_dev->use_fast_reg ?
1350 srp_map_finish_fr(state, ch) :
1351 srp_map_finish_fmr(state, ch);
1361 static void srp_map_update_start(struct srp_map_state *state,
1362 struct scatterlist *sg, int sg_index,
1363 dma_addr_t dma_addr)
1365 state->unmapped_sg = sg;
1366 state->unmapped_index = sg_index;
1367 state->unmapped_addr = dma_addr;
1370 static int srp_map_sg_entry(struct srp_map_state *state,
1371 struct srp_rdma_ch *ch,
1372 struct scatterlist *sg, int sg_index,
1375 struct srp_target_port *target = ch->target;
1376 struct srp_device *dev = target->srp_host->srp_dev;
1377 struct ib_device *ibdev = dev->dev;
1378 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1379 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1388 * Once we're in direct map mode for a request, we don't
1389 * go back to FMR or FR mode, so no need to update anything
1390 * other than the descriptor.
1392 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1396 if (dma_len > dev->mr_max_size) {
1397 ret = srp_finish_mapping(state, ch);
1401 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1402 srp_map_update_start(state, NULL, 0, 0);
1407 * If this is the first sg that will be mapped via FMR or via FR, save
1408 * our position. We need to know the first unmapped entry, its index,
1409 * and the first unmapped address within that entry to be able to
1410 * restart mapping after an error.
1412 if (!state->unmapped_sg)
1413 srp_map_update_start(state, sg, sg_index, dma_addr);
1416 unsigned offset = dma_addr & ~dev->mr_page_mask;
1417 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1418 ret = srp_finish_mapping(state, ch);
1422 srp_map_update_start(state, sg, sg_index, dma_addr);
1425 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1428 state->base_dma_addr = dma_addr;
1429 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1430 state->dma_len += len;
1436 * If the last entry of the MR wasn't a full page, then we need to
1437 * close it out and start a new one -- we can only merge at page
1441 if (len != dev->mr_page_size) {
1442 ret = srp_finish_mapping(state, ch);
1444 srp_map_update_start(state, NULL, 0, 0);
1449 static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
1450 struct srp_request *req, struct scatterlist *scat,
1453 struct srp_target_port *target = ch->target;
1454 struct srp_device *dev = target->srp_host->srp_dev;
1455 struct ib_device *ibdev = dev->dev;
1456 struct scatterlist *sg;
1460 state->desc = req->indirect_desc;
1461 state->pages = req->map_page;
1462 if (dev->use_fast_reg) {
1463 state->next_fr = req->fr_list;
1464 use_mr = !!ch->fr_pool;
1466 state->next_fmr = req->fmr_list;
1467 use_mr = !!ch->fmr_pool;
1470 for_each_sg(scat, sg, count, i) {
1471 if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
1473 * Memory registration failed, so backtrack to the
1474 * first unmapped entry and continue on without using
1475 * memory registration.
1477 dma_addr_t dma_addr;
1478 unsigned int dma_len;
1481 sg = state->unmapped_sg;
1482 i = state->unmapped_index;
1484 dma_addr = ib_sg_dma_address(ibdev, sg);
1485 dma_len = ib_sg_dma_len(ibdev, sg);
1486 dma_len -= (state->unmapped_addr - dma_addr);
1487 dma_addr = state->unmapped_addr;
1489 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1493 if (use_mr && srp_finish_mapping(state, ch))
1496 req->nmdesc = state->nmdesc;
1501 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1502 struct srp_request *req)
1504 struct srp_target_port *target = ch->target;
1505 struct scatterlist *scat;
1506 struct srp_cmd *cmd = req->cmd->buf;
1507 int len, nents, count;
1508 struct srp_device *dev;
1509 struct ib_device *ibdev;
1510 struct srp_map_state state;
1511 struct srp_indirect_buf *indirect_hdr;
1515 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1516 return sizeof (struct srp_cmd);
1518 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1519 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1520 shost_printk(KERN_WARNING, target->scsi_host,
1521 PFX "Unhandled data direction %d\n",
1522 scmnd->sc_data_direction);
1526 nents = scsi_sg_count(scmnd);
1527 scat = scsi_sglist(scmnd);
1529 dev = target->srp_host->srp_dev;
1532 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1533 if (unlikely(count == 0))
1536 fmt = SRP_DATA_DESC_DIRECT;
1537 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1539 if (count == 1 && !register_always) {
1541 * The midlayer only generated a single gather/scatter
1542 * entry, or DMA mapping coalesced everything to a
1543 * single entry. So a direct descriptor along with
1544 * the DMA MR suffices.
1546 struct srp_direct_buf *buf = (void *) cmd->add_data;
1548 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1549 buf->key = cpu_to_be32(target->rkey);
1550 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1557 * We have more than one scatter/gather entry, so build our indirect
1558 * descriptor table, trying to merge as many entries as we can.
1560 indirect_hdr = (void *) cmd->add_data;
1562 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1563 target->indirect_size, DMA_TO_DEVICE);
1565 memset(&state, 0, sizeof(state));
1566 srp_map_sg(&state, ch, req, scat, count);
1568 /* We've mapped the request, now pull as much of the indirect
1569 * descriptor table as we can into the command buffer. If this
1570 * target is not using an external indirect table, we are
1571 * guaranteed to fit into the command, as the SCSI layer won't
1572 * give us more S/G entries than we allow.
1574 if (state.ndesc == 1) {
1576 * Memory registration collapsed the sg-list into one entry,
1577 * so use a direct descriptor.
1579 struct srp_direct_buf *buf = (void *) cmd->add_data;
1581 *buf = req->indirect_desc[0];
1585 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1586 !target->allow_ext_sg)) {
1587 shost_printk(KERN_ERR, target->scsi_host,
1588 "Could not fit S/G list into SRP_CMD\n");
1592 count = min(state.ndesc, target->cmd_sg_cnt);
1593 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1595 fmt = SRP_DATA_DESC_INDIRECT;
1596 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1597 len += count * sizeof (struct srp_direct_buf);
1599 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1600 count * sizeof (struct srp_direct_buf));
1602 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1603 indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1604 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1605 indirect_hdr->len = cpu_to_be32(state.total_len);
1607 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1608 cmd->data_out_desc_cnt = count;
1610 cmd->data_in_desc_cnt = count;
1612 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1616 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1617 cmd->buf_fmt = fmt << 4;
1625 * Return an IU and possible credit to the free pool
1627 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1628 enum srp_iu_type iu_type)
1630 unsigned long flags;
1632 spin_lock_irqsave(&ch->lock, flags);
1633 list_add(&iu->list, &ch->free_tx);
1634 if (iu_type != SRP_IU_RSP)
1636 spin_unlock_irqrestore(&ch->lock, flags);
1640 * Must be called with ch->lock held to protect req_lim and free_tx.
1641 * If IU is not sent, it must be returned using srp_put_tx_iu().
1644 * An upper limit for the number of allocated information units for each
1646 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1647 * more than Scsi_Host.can_queue requests.
1648 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1649 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1650 * one unanswered SRP request to an initiator.
1652 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1653 enum srp_iu_type iu_type)
1655 struct srp_target_port *target = ch->target;
1656 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1659 srp_send_completion(ch->send_cq, ch);
1661 if (list_empty(&ch->free_tx))
1664 /* Initiator responses to target requests do not consume credits */
1665 if (iu_type != SRP_IU_RSP) {
1666 if (ch->req_lim <= rsv) {
1667 ++target->zero_req_lim;
1674 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1675 list_del(&iu->list);
1679 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1681 struct srp_target_port *target = ch->target;
1683 struct ib_send_wr wr, *bad_wr;
1685 list.addr = iu->dma;
1687 list.lkey = target->lkey;
1690 wr.wr_id = (uintptr_t) iu;
1693 wr.opcode = IB_WR_SEND;
1694 wr.send_flags = IB_SEND_SIGNALED;
1696 return ib_post_send(ch->qp, &wr, &bad_wr);
1699 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1701 struct srp_target_port *target = ch->target;
1702 struct ib_recv_wr wr, *bad_wr;
1705 list.addr = iu->dma;
1706 list.length = iu->size;
1707 list.lkey = target->lkey;
1710 wr.wr_id = (uintptr_t) iu;
1714 return ib_post_recv(ch->qp, &wr, &bad_wr);
1717 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1719 struct srp_target_port *target = ch->target;
1720 struct srp_request *req;
1721 struct scsi_cmnd *scmnd;
1722 unsigned long flags;
1724 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1725 spin_lock_irqsave(&ch->lock, flags);
1726 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1727 spin_unlock_irqrestore(&ch->lock, flags);
1729 ch->tsk_mgmt_status = -1;
1730 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1731 ch->tsk_mgmt_status = rsp->data[3];
1732 complete(&ch->tsk_mgmt_done);
1734 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1736 req = (void *)scmnd->host_scribble;
1737 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1740 shost_printk(KERN_ERR, target->scsi_host,
1741 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1742 rsp->tag, ch - target->ch, ch->qp->qp_num);
1744 spin_lock_irqsave(&ch->lock, flags);
1745 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1746 spin_unlock_irqrestore(&ch->lock, flags);
1750 scmnd->result = rsp->status;
1752 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1753 memcpy(scmnd->sense_buffer, rsp->data +
1754 be32_to_cpu(rsp->resp_data_len),
1755 min_t(int, be32_to_cpu(rsp->sense_data_len),
1756 SCSI_SENSE_BUFFERSIZE));
1759 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1760 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1761 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1762 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1763 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1764 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1765 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1766 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1768 srp_free_req(ch, req, scmnd,
1769 be32_to_cpu(rsp->req_lim_delta));
1771 scmnd->host_scribble = NULL;
1772 scmnd->scsi_done(scmnd);
1776 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1779 struct srp_target_port *target = ch->target;
1780 struct ib_device *dev = target->srp_host->srp_dev->dev;
1781 unsigned long flags;
1785 spin_lock_irqsave(&ch->lock, flags);
1786 ch->req_lim += req_delta;
1787 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1788 spin_unlock_irqrestore(&ch->lock, flags);
1791 shost_printk(KERN_ERR, target->scsi_host, PFX
1792 "no IU available to send response\n");
1796 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1797 memcpy(iu->buf, rsp, len);
1798 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1800 err = srp_post_send(ch, iu, len);
1802 shost_printk(KERN_ERR, target->scsi_host, PFX
1803 "unable to post response: %d\n", err);
1804 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1810 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1811 struct srp_cred_req *req)
1813 struct srp_cred_rsp rsp = {
1814 .opcode = SRP_CRED_RSP,
1817 s32 delta = be32_to_cpu(req->req_lim_delta);
1819 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1820 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1821 "problems processing SRP_CRED_REQ\n");
1824 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1825 struct srp_aer_req *req)
1827 struct srp_target_port *target = ch->target;
1828 struct srp_aer_rsp rsp = {
1829 .opcode = SRP_AER_RSP,
1832 s32 delta = be32_to_cpu(req->req_lim_delta);
1834 shost_printk(KERN_ERR, target->scsi_host, PFX
1835 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1837 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1838 shost_printk(KERN_ERR, target->scsi_host, PFX
1839 "problems processing SRP_AER_REQ\n");
1842 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1844 struct srp_target_port *target = ch->target;
1845 struct ib_device *dev = target->srp_host->srp_dev->dev;
1846 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1850 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1853 opcode = *(u8 *) iu->buf;
1856 shost_printk(KERN_ERR, target->scsi_host,
1857 PFX "recv completion, opcode 0x%02x\n", opcode);
1858 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1859 iu->buf, wc->byte_len, true);
1864 srp_process_rsp(ch, iu->buf);
1868 srp_process_cred_req(ch, iu->buf);
1872 srp_process_aer_req(ch, iu->buf);
1876 /* XXX Handle target logout */
1877 shost_printk(KERN_WARNING, target->scsi_host,
1878 PFX "Got target logout request\n");
1882 shost_printk(KERN_WARNING, target->scsi_host,
1883 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1887 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1890 res = srp_post_recv(ch, iu);
1892 shost_printk(KERN_ERR, target->scsi_host,
1893 PFX "Recv failed with error code %d\n", res);
1897 * srp_tl_err_work() - handle a transport layer error
1898 * @work: Work structure embedded in an SRP target port.
1900 * Note: This function may get invoked before the rport has been created,
1901 * hence the target->rport test.
1903 static void srp_tl_err_work(struct work_struct *work)
1905 struct srp_target_port *target;
1907 target = container_of(work, struct srp_target_port, tl_err_work);
1909 srp_start_tl_fail_timers(target->rport);
1912 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1913 bool send_err, struct srp_rdma_ch *ch)
1915 struct srp_target_port *target = ch->target;
1917 if (wr_id == SRP_LAST_WR_ID) {
1918 complete(&ch->done);
1922 if (ch->connected && !target->qp_in_error) {
1923 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1924 shost_printk(KERN_ERR, target->scsi_host, PFX
1925 "LOCAL_INV failed with status %s (%d)\n",
1926 ib_wc_status_msg(wc_status), wc_status);
1927 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1928 shost_printk(KERN_ERR, target->scsi_host, PFX
1929 "FAST_REG_MR failed status %s (%d)\n",
1930 ib_wc_status_msg(wc_status), wc_status);
1932 shost_printk(KERN_ERR, target->scsi_host,
1933 PFX "failed %s status %s (%d) for iu %p\n",
1934 send_err ? "send" : "receive",
1935 ib_wc_status_msg(wc_status), wc_status,
1936 (void *)(uintptr_t)wr_id);
1938 queue_work(system_long_wq, &target->tl_err_work);
1940 target->qp_in_error = true;
1943 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
1945 struct srp_rdma_ch *ch = ch_ptr;
1948 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1949 while (ib_poll_cq(cq, 1, &wc) > 0) {
1950 if (likely(wc.status == IB_WC_SUCCESS)) {
1951 srp_handle_recv(ch, &wc);
1953 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
1958 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
1960 struct srp_rdma_ch *ch = ch_ptr;
1964 while (ib_poll_cq(cq, 1, &wc) > 0) {
1965 if (likely(wc.status == IB_WC_SUCCESS)) {
1966 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1967 list_add(&iu->list, &ch->free_tx);
1969 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
1974 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1976 struct srp_target_port *target = host_to_target(shost);
1977 struct srp_rport *rport = target->rport;
1978 struct srp_rdma_ch *ch;
1979 struct srp_request *req;
1981 struct srp_cmd *cmd;
1982 struct ib_device *dev;
1983 unsigned long flags;
1987 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1990 * The SCSI EH thread is the only context from which srp_queuecommand()
1991 * can get invoked for blocked devices (SDEV_BLOCK /
1992 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1993 * locking the rport mutex if invoked from inside the SCSI EH.
1996 mutex_lock(&rport->mutex);
1998 scmnd->result = srp_chkready(target->rport);
1999 if (unlikely(scmnd->result))
2002 WARN_ON_ONCE(scmnd->request->tag < 0);
2003 tag = blk_mq_unique_tag(scmnd->request);
2004 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2005 idx = blk_mq_unique_tag_to_tag(tag);
2006 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2007 dev_name(&shost->shost_gendev), tag, idx,
2008 target->req_ring_size);
2010 spin_lock_irqsave(&ch->lock, flags);
2011 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2012 spin_unlock_irqrestore(&ch->lock, flags);
2017 req = &ch->req_ring[idx];
2018 dev = target->srp_host->srp_dev->dev;
2019 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2022 scmnd->host_scribble = (void *) req;
2025 memset(cmd, 0, sizeof *cmd);
2027 cmd->opcode = SRP_CMD;
2028 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2030 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2035 len = srp_map_data(scmnd, ch, req);
2037 shost_printk(KERN_ERR, target->scsi_host,
2038 PFX "Failed to map data (%d)\n", len);
2040 * If we ran out of memory descriptors (-ENOMEM) because an
2041 * application is queuing many requests with more than
2042 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2043 * to reduce queue depth temporarily.
2045 scmnd->result = len == -ENOMEM ?
2046 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2050 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2053 if (srp_post_send(ch, iu, len)) {
2054 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2062 mutex_unlock(&rport->mutex);
2067 srp_unmap_data(scmnd, ch, req);
2070 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2073 * Avoid that the loops that iterate over the request ring can
2074 * encounter a dangling SCSI command pointer.
2079 if (scmnd->result) {
2080 scmnd->scsi_done(scmnd);
2083 ret = SCSI_MLQUEUE_HOST_BUSY;
2090 * Note: the resources allocated in this function are freed in
2093 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2095 struct srp_target_port *target = ch->target;
2098 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2102 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2107 for (i = 0; i < target->queue_size; ++i) {
2108 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2110 GFP_KERNEL, DMA_FROM_DEVICE);
2111 if (!ch->rx_ring[i])
2115 for (i = 0; i < target->queue_size; ++i) {
2116 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2118 GFP_KERNEL, DMA_TO_DEVICE);
2119 if (!ch->tx_ring[i])
2122 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2128 for (i = 0; i < target->queue_size; ++i) {
2129 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2130 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2143 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2145 uint64_t T_tr_ns, max_compl_time_ms;
2146 uint32_t rq_tmo_jiffies;
2149 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2150 * table 91), both the QP timeout and the retry count have to be set
2151 * for RC QP's during the RTR to RTS transition.
2153 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2154 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2157 * Set target->rq_tmo_jiffies to one second more than the largest time
2158 * it can take before an error completion is generated. See also
2159 * C9-140..142 in the IBTA spec for more information about how to
2160 * convert the QP Local ACK Timeout value to nanoseconds.
2162 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2163 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2164 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2165 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2167 return rq_tmo_jiffies;
2170 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2171 const struct srp_login_rsp *lrsp,
2172 struct srp_rdma_ch *ch)
2174 struct srp_target_port *target = ch->target;
2175 struct ib_qp_attr *qp_attr = NULL;
2180 if (lrsp->opcode == SRP_LOGIN_RSP) {
2181 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2182 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2185 * Reserve credits for task management so we don't
2186 * bounce requests back to the SCSI mid-layer.
2188 target->scsi_host->can_queue
2189 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2190 target->scsi_host->can_queue);
2191 target->scsi_host->cmd_per_lun
2192 = min_t(int, target->scsi_host->can_queue,
2193 target->scsi_host->cmd_per_lun);
2195 shost_printk(KERN_WARNING, target->scsi_host,
2196 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2202 ret = srp_alloc_iu_bufs(ch);
2208 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2212 qp_attr->qp_state = IB_QPS_RTR;
2213 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2217 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2221 for (i = 0; i < target->queue_size; i++) {
2222 struct srp_iu *iu = ch->rx_ring[i];
2224 ret = srp_post_recv(ch, iu);
2229 qp_attr->qp_state = IB_QPS_RTS;
2230 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2234 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2236 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2240 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2249 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2250 struct ib_cm_event *event,
2251 struct srp_rdma_ch *ch)
2253 struct srp_target_port *target = ch->target;
2254 struct Scsi_Host *shost = target->scsi_host;
2255 struct ib_class_port_info *cpi;
2258 switch (event->param.rej_rcvd.reason) {
2259 case IB_CM_REJ_PORT_CM_REDIRECT:
2260 cpi = event->param.rej_rcvd.ari;
2261 ch->path.dlid = cpi->redirect_lid;
2262 ch->path.pkey = cpi->redirect_pkey;
2263 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2264 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2266 ch->status = ch->path.dlid ?
2267 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2270 case IB_CM_REJ_PORT_REDIRECT:
2271 if (srp_target_is_topspin(target)) {
2273 * Topspin/Cisco SRP gateways incorrectly send
2274 * reject reason code 25 when they mean 24
2277 memcpy(ch->path.dgid.raw,
2278 event->param.rej_rcvd.ari, 16);
2280 shost_printk(KERN_DEBUG, shost,
2281 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2282 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2283 be64_to_cpu(ch->path.dgid.global.interface_id));
2285 ch->status = SRP_PORT_REDIRECT;
2287 shost_printk(KERN_WARNING, shost,
2288 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2289 ch->status = -ECONNRESET;
2293 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2294 shost_printk(KERN_WARNING, shost,
2295 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2296 ch->status = -ECONNRESET;
2299 case IB_CM_REJ_CONSUMER_DEFINED:
2300 opcode = *(u8 *) event->private_data;
2301 if (opcode == SRP_LOGIN_REJ) {
2302 struct srp_login_rej *rej = event->private_data;
2303 u32 reason = be32_to_cpu(rej->reason);
2305 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2306 shost_printk(KERN_WARNING, shost,
2307 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2309 shost_printk(KERN_WARNING, shost, PFX
2310 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2312 target->orig_dgid.raw, reason);
2314 shost_printk(KERN_WARNING, shost,
2315 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2316 " opcode 0x%02x\n", opcode);
2317 ch->status = -ECONNRESET;
2320 case IB_CM_REJ_STALE_CONN:
2321 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2322 ch->status = SRP_STALE_CONN;
2326 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2327 event->param.rej_rcvd.reason);
2328 ch->status = -ECONNRESET;
2332 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2334 struct srp_rdma_ch *ch = cm_id->context;
2335 struct srp_target_port *target = ch->target;
2338 switch (event->event) {
2339 case IB_CM_REQ_ERROR:
2340 shost_printk(KERN_DEBUG, target->scsi_host,
2341 PFX "Sending CM REQ failed\n");
2343 ch->status = -ECONNRESET;
2346 case IB_CM_REP_RECEIVED:
2348 srp_cm_rep_handler(cm_id, event->private_data, ch);
2351 case IB_CM_REJ_RECEIVED:
2352 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2355 srp_cm_rej_handler(cm_id, event, ch);
2358 case IB_CM_DREQ_RECEIVED:
2359 shost_printk(KERN_WARNING, target->scsi_host,
2360 PFX "DREQ received - connection closed\n");
2361 ch->connected = false;
2362 if (ib_send_cm_drep(cm_id, NULL, 0))
2363 shost_printk(KERN_ERR, target->scsi_host,
2364 PFX "Sending CM DREP failed\n");
2365 queue_work(system_long_wq, &target->tl_err_work);
2368 case IB_CM_TIMEWAIT_EXIT:
2369 shost_printk(KERN_ERR, target->scsi_host,
2370 PFX "connection closed\n");
2376 case IB_CM_MRA_RECEIVED:
2377 case IB_CM_DREQ_ERROR:
2378 case IB_CM_DREP_RECEIVED:
2382 shost_printk(KERN_WARNING, target->scsi_host,
2383 PFX "Unhandled CM event %d\n", event->event);
2388 complete(&ch->done);
2394 * srp_change_queue_depth - setting device queue depth
2395 * @sdev: scsi device struct
2396 * @qdepth: requested queue depth
2398 * Returns queue depth.
2401 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2403 if (!sdev->tagged_supported)
2405 return scsi_change_queue_depth(sdev, qdepth);
2408 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2411 struct srp_target_port *target = ch->target;
2412 struct srp_rport *rport = target->rport;
2413 struct ib_device *dev = target->srp_host->srp_dev->dev;
2415 struct srp_tsk_mgmt *tsk_mgmt;
2417 if (!ch->connected || target->qp_in_error)
2420 init_completion(&ch->tsk_mgmt_done);
2423 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2424 * invoked while a task management function is being sent.
2426 mutex_lock(&rport->mutex);
2427 spin_lock_irq(&ch->lock);
2428 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2429 spin_unlock_irq(&ch->lock);
2432 mutex_unlock(&rport->mutex);
2437 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2440 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2442 tsk_mgmt->opcode = SRP_TSK_MGMT;
2443 int_to_scsilun(lun, &tsk_mgmt->lun);
2444 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2445 tsk_mgmt->tsk_mgmt_func = func;
2446 tsk_mgmt->task_tag = req_tag;
2448 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2450 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2451 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2452 mutex_unlock(&rport->mutex);
2456 mutex_unlock(&rport->mutex);
2458 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2459 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2465 static int srp_abort(struct scsi_cmnd *scmnd)
2467 struct srp_target_port *target = host_to_target(scmnd->device->host);
2468 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2471 struct srp_rdma_ch *ch;
2474 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2478 tag = blk_mq_unique_tag(scmnd->request);
2479 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2480 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2482 ch = &target->ch[ch_idx];
2483 if (!srp_claim_req(ch, req, NULL, scmnd))
2485 shost_printk(KERN_ERR, target->scsi_host,
2486 "Sending SRP abort for tag %#x\n", tag);
2487 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2488 SRP_TSK_ABORT_TASK) == 0)
2490 else if (target->rport->state == SRP_RPORT_LOST)
2494 srp_free_req(ch, req, scmnd, 0);
2495 scmnd->result = DID_ABORT << 16;
2496 scmnd->scsi_done(scmnd);
2501 static int srp_reset_device(struct scsi_cmnd *scmnd)
2503 struct srp_target_port *target = host_to_target(scmnd->device->host);
2504 struct srp_rdma_ch *ch;
2507 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2509 ch = &target->ch[0];
2510 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2513 if (ch->tsk_mgmt_status)
2516 for (i = 0; i < target->ch_count; i++) {
2517 ch = &target->ch[i];
2518 for (i = 0; i < target->req_ring_size; ++i) {
2519 struct srp_request *req = &ch->req_ring[i];
2521 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2528 static int srp_reset_host(struct scsi_cmnd *scmnd)
2530 struct srp_target_port *target = host_to_target(scmnd->device->host);
2532 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2534 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2537 static int srp_slave_configure(struct scsi_device *sdev)
2539 struct Scsi_Host *shost = sdev->host;
2540 struct srp_target_port *target = host_to_target(shost);
2541 struct request_queue *q = sdev->request_queue;
2542 unsigned long timeout;
2544 if (sdev->type == TYPE_DISK) {
2545 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2546 blk_queue_rq_timeout(q, timeout);
2552 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2555 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2557 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2560 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2563 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2565 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2568 static ssize_t show_service_id(struct device *dev,
2569 struct device_attribute *attr, char *buf)
2571 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2573 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2576 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2579 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2581 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2584 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2587 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2589 return sprintf(buf, "%pI6\n", target->sgid.raw);
2592 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2595 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2596 struct srp_rdma_ch *ch = &target->ch[0];
2598 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2601 static ssize_t show_orig_dgid(struct device *dev,
2602 struct device_attribute *attr, char *buf)
2604 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2606 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2609 static ssize_t show_req_lim(struct device *dev,
2610 struct device_attribute *attr, char *buf)
2612 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2613 struct srp_rdma_ch *ch;
2614 int i, req_lim = INT_MAX;
2616 for (i = 0; i < target->ch_count; i++) {
2617 ch = &target->ch[i];
2618 req_lim = min(req_lim, ch->req_lim);
2620 return sprintf(buf, "%d\n", req_lim);
2623 static ssize_t show_zero_req_lim(struct device *dev,
2624 struct device_attribute *attr, char *buf)
2626 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2628 return sprintf(buf, "%d\n", target->zero_req_lim);
2631 static ssize_t show_local_ib_port(struct device *dev,
2632 struct device_attribute *attr, char *buf)
2634 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2636 return sprintf(buf, "%d\n", target->srp_host->port);
2639 static ssize_t show_local_ib_device(struct device *dev,
2640 struct device_attribute *attr, char *buf)
2642 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2644 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2647 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2650 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2652 return sprintf(buf, "%d\n", target->ch_count);
2655 static ssize_t show_comp_vector(struct device *dev,
2656 struct device_attribute *attr, char *buf)
2658 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2660 return sprintf(buf, "%d\n", target->comp_vector);
2663 static ssize_t show_tl_retry_count(struct device *dev,
2664 struct device_attribute *attr, char *buf)
2666 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2668 return sprintf(buf, "%d\n", target->tl_retry_count);
2671 static ssize_t show_cmd_sg_entries(struct device *dev,
2672 struct device_attribute *attr, char *buf)
2674 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2676 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2679 static ssize_t show_allow_ext_sg(struct device *dev,
2680 struct device_attribute *attr, char *buf)
2682 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2684 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2687 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2688 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2689 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2690 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2691 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2692 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2693 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2694 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2695 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2696 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2697 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2698 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2699 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2700 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2701 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2702 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2704 static struct device_attribute *srp_host_attrs[] = {
2707 &dev_attr_service_id,
2711 &dev_attr_orig_dgid,
2713 &dev_attr_zero_req_lim,
2714 &dev_attr_local_ib_port,
2715 &dev_attr_local_ib_device,
2717 &dev_attr_comp_vector,
2718 &dev_attr_tl_retry_count,
2719 &dev_attr_cmd_sg_entries,
2720 &dev_attr_allow_ext_sg,
2724 static struct scsi_host_template srp_template = {
2725 .module = THIS_MODULE,
2726 .name = "InfiniBand SRP initiator",
2727 .proc_name = DRV_NAME,
2728 .slave_configure = srp_slave_configure,
2729 .info = srp_target_info,
2730 .queuecommand = srp_queuecommand,
2731 .change_queue_depth = srp_change_queue_depth,
2732 .eh_abort_handler = srp_abort,
2733 .eh_device_reset_handler = srp_reset_device,
2734 .eh_host_reset_handler = srp_reset_host,
2735 .skip_settle_delay = true,
2736 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2737 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2739 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2740 .use_clustering = ENABLE_CLUSTERING,
2741 .shost_attrs = srp_host_attrs,
2743 .track_queue_depth = 1,
2746 static int srp_sdev_count(struct Scsi_Host *host)
2748 struct scsi_device *sdev;
2751 shost_for_each_device(sdev, host)
2759 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2760 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2761 * removal has been scheduled.
2762 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2764 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2766 struct srp_rport_identifiers ids;
2767 struct srp_rport *rport;
2769 target->state = SRP_TARGET_SCANNING;
2770 sprintf(target->target_name, "SRP.T10:%016llX",
2771 be64_to_cpu(target->id_ext));
2773 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2776 memcpy(ids.port_id, &target->id_ext, 8);
2777 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2778 ids.roles = SRP_RPORT_ROLE_TARGET;
2779 rport = srp_rport_add(target->scsi_host, &ids);
2780 if (IS_ERR(rport)) {
2781 scsi_remove_host(target->scsi_host);
2782 return PTR_ERR(rport);
2785 rport->lld_data = target;
2786 target->rport = rport;
2788 spin_lock(&host->target_lock);
2789 list_add_tail(&target->list, &host->target_list);
2790 spin_unlock(&host->target_lock);
2792 scsi_scan_target(&target->scsi_host->shost_gendev,
2793 0, target->scsi_id, SCAN_WILD_CARD, 0);
2795 if (srp_connected_ch(target) < target->ch_count ||
2796 target->qp_in_error) {
2797 shost_printk(KERN_INFO, target->scsi_host,
2798 PFX "SCSI scan failed - removing SCSI host\n");
2799 srp_queue_remove_work(target);
2803 pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2804 dev_name(&target->scsi_host->shost_gendev),
2805 srp_sdev_count(target->scsi_host));
2807 spin_lock_irq(&target->lock);
2808 if (target->state == SRP_TARGET_SCANNING)
2809 target->state = SRP_TARGET_LIVE;
2810 spin_unlock_irq(&target->lock);
2816 static void srp_release_dev(struct device *dev)
2818 struct srp_host *host =
2819 container_of(dev, struct srp_host, dev);
2821 complete(&host->released);
2824 static struct class srp_class = {
2825 .name = "infiniband_srp",
2826 .dev_release = srp_release_dev
2830 * srp_conn_unique() - check whether the connection to a target is unique
2832 * @target: SRP target port.
2834 static bool srp_conn_unique(struct srp_host *host,
2835 struct srp_target_port *target)
2837 struct srp_target_port *t;
2840 if (target->state == SRP_TARGET_REMOVED)
2845 spin_lock(&host->target_lock);
2846 list_for_each_entry(t, &host->target_list, list) {
2848 target->id_ext == t->id_ext &&
2849 target->ioc_guid == t->ioc_guid &&
2850 target->initiator_ext == t->initiator_ext) {
2855 spin_unlock(&host->target_lock);
2862 * Target ports are added by writing
2864 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2865 * pkey=<P_Key>,service_id=<service ID>
2867 * to the add_target sysfs attribute.
2871 SRP_OPT_ID_EXT = 1 << 0,
2872 SRP_OPT_IOC_GUID = 1 << 1,
2873 SRP_OPT_DGID = 1 << 2,
2874 SRP_OPT_PKEY = 1 << 3,
2875 SRP_OPT_SERVICE_ID = 1 << 4,
2876 SRP_OPT_MAX_SECT = 1 << 5,
2877 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2878 SRP_OPT_IO_CLASS = 1 << 7,
2879 SRP_OPT_INITIATOR_EXT = 1 << 8,
2880 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2881 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2882 SRP_OPT_SG_TABLESIZE = 1 << 11,
2883 SRP_OPT_COMP_VECTOR = 1 << 12,
2884 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2885 SRP_OPT_QUEUE_SIZE = 1 << 14,
2886 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2890 SRP_OPT_SERVICE_ID),
2893 static const match_table_t srp_opt_tokens = {
2894 { SRP_OPT_ID_EXT, "id_ext=%s" },
2895 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
2896 { SRP_OPT_DGID, "dgid=%s" },
2897 { SRP_OPT_PKEY, "pkey=%x" },
2898 { SRP_OPT_SERVICE_ID, "service_id=%s" },
2899 { SRP_OPT_MAX_SECT, "max_sect=%d" },
2900 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
2901 { SRP_OPT_IO_CLASS, "io_class=%x" },
2902 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
2903 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2904 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2905 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2906 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2907 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2908 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2909 { SRP_OPT_ERR, NULL }
2912 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2914 char *options, *sep_opt;
2917 substring_t args[MAX_OPT_ARGS];
2923 options = kstrdup(buf, GFP_KERNEL);
2928 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2932 token = match_token(p, srp_opt_tokens, args);
2936 case SRP_OPT_ID_EXT:
2937 p = match_strdup(args);
2942 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2946 case SRP_OPT_IOC_GUID:
2947 p = match_strdup(args);
2952 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2957 p = match_strdup(args);
2962 if (strlen(p) != 32) {
2963 pr_warn("bad dest GID parameter '%s'\n", p);
2968 for (i = 0; i < 16; ++i) {
2969 strlcpy(dgid, p + i * 2, sizeof(dgid));
2970 if (sscanf(dgid, "%hhx",
2971 &target->orig_dgid.raw[i]) < 1) {
2981 if (match_hex(args, &token)) {
2982 pr_warn("bad P_Key parameter '%s'\n", p);
2985 target->pkey = cpu_to_be16(token);
2988 case SRP_OPT_SERVICE_ID:
2989 p = match_strdup(args);
2994 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
2998 case SRP_OPT_MAX_SECT:
2999 if (match_int(args, &token)) {
3000 pr_warn("bad max sect parameter '%s'\n", p);
3003 target->scsi_host->max_sectors = token;
3006 case SRP_OPT_QUEUE_SIZE:
3007 if (match_int(args, &token) || token < 1) {
3008 pr_warn("bad queue_size parameter '%s'\n", p);
3011 target->scsi_host->can_queue = token;
3012 target->queue_size = token + SRP_RSP_SQ_SIZE +
3013 SRP_TSK_MGMT_SQ_SIZE;
3014 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3015 target->scsi_host->cmd_per_lun = token;
3018 case SRP_OPT_MAX_CMD_PER_LUN:
3019 if (match_int(args, &token) || token < 1) {
3020 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3024 target->scsi_host->cmd_per_lun = token;
3027 case SRP_OPT_IO_CLASS:
3028 if (match_hex(args, &token)) {
3029 pr_warn("bad IO class parameter '%s'\n", p);
3032 if (token != SRP_REV10_IB_IO_CLASS &&
3033 token != SRP_REV16A_IB_IO_CLASS) {
3034 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3035 token, SRP_REV10_IB_IO_CLASS,
3036 SRP_REV16A_IB_IO_CLASS);
3039 target->io_class = token;
3042 case SRP_OPT_INITIATOR_EXT:
3043 p = match_strdup(args);
3048 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3052 case SRP_OPT_CMD_SG_ENTRIES:
3053 if (match_int(args, &token) || token < 1 || token > 255) {
3054 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3058 target->cmd_sg_cnt = token;
3061 case SRP_OPT_ALLOW_EXT_SG:
3062 if (match_int(args, &token)) {
3063 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3066 target->allow_ext_sg = !!token;
3069 case SRP_OPT_SG_TABLESIZE:
3070 if (match_int(args, &token) || token < 1 ||
3071 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3072 pr_warn("bad max sg_tablesize parameter '%s'\n",
3076 target->sg_tablesize = token;
3079 case SRP_OPT_COMP_VECTOR:
3080 if (match_int(args, &token) || token < 0) {
3081 pr_warn("bad comp_vector parameter '%s'\n", p);
3084 target->comp_vector = token;
3087 case SRP_OPT_TL_RETRY_COUNT:
3088 if (match_int(args, &token) || token < 2 || token > 7) {
3089 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3093 target->tl_retry_count = token;
3097 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3103 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3106 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3107 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3108 !(srp_opt_tokens[i].token & opt_mask))
3109 pr_warn("target creation request is missing parameter '%s'\n",
3110 srp_opt_tokens[i].pattern);
3112 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3113 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3114 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3115 target->scsi_host->cmd_per_lun,
3116 target->scsi_host->can_queue);
3123 static ssize_t srp_create_target(struct device *dev,
3124 struct device_attribute *attr,
3125 const char *buf, size_t count)
3127 struct srp_host *host =
3128 container_of(dev, struct srp_host, dev);
3129 struct Scsi_Host *target_host;
3130 struct srp_target_port *target;
3131 struct srp_rdma_ch *ch;
3132 struct srp_device *srp_dev = host->srp_dev;
3133 struct ib_device *ibdev = srp_dev->dev;
3134 int ret, node_idx, node, cpu, i;
3135 bool multich = false;
3137 target_host = scsi_host_alloc(&srp_template,
3138 sizeof (struct srp_target_port));
3142 target_host->transportt = ib_srp_transport_template;
3143 target_host->max_channel = 0;
3144 target_host->max_id = 1;
3145 target_host->max_lun = -1LL;
3146 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3148 target = host_to_target(target_host);
3150 target->io_class = SRP_REV16A_IB_IO_CLASS;
3151 target->scsi_host = target_host;
3152 target->srp_host = host;
3153 target->lkey = host->srp_dev->pd->local_dma_lkey;
3154 target->rkey = host->srp_dev->mr->rkey;
3155 target->cmd_sg_cnt = cmd_sg_entries;
3156 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3157 target->allow_ext_sg = allow_ext_sg;
3158 target->tl_retry_count = 7;
3159 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3162 * Avoid that the SCSI host can be removed by srp_remove_target()
3163 * before this function returns.
3165 scsi_host_get(target->scsi_host);
3167 mutex_lock(&host->add_target_mutex);
3169 ret = srp_parse_options(buf, target);
3173 ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3177 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3179 if (!srp_conn_unique(target->srp_host, target)) {
3180 shost_printk(KERN_INFO, target->scsi_host,
3181 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3182 be64_to_cpu(target->id_ext),
3183 be64_to_cpu(target->ioc_guid),
3184 be64_to_cpu(target->initiator_ext));
3189 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3190 target->cmd_sg_cnt < target->sg_tablesize) {
3191 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3192 target->sg_tablesize = target->cmd_sg_cnt;
3195 target_host->sg_tablesize = target->sg_tablesize;
3196 target->indirect_size = target->sg_tablesize *
3197 sizeof (struct srp_direct_buf);
3198 target->max_iu_len = sizeof (struct srp_cmd) +
3199 sizeof (struct srp_indirect_buf) +
3200 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3202 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3203 INIT_WORK(&target->remove_work, srp_remove_work);
3204 spin_lock_init(&target->lock);
3205 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
3210 target->ch_count = max_t(unsigned, num_online_nodes(),
3212 min(4 * num_online_nodes(),
3213 ibdev->num_comp_vectors),
3214 num_online_cpus()));
3215 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3221 for_each_online_node(node) {
3222 const int ch_start = (node_idx * target->ch_count /
3223 num_online_nodes());
3224 const int ch_end = ((node_idx + 1) * target->ch_count /
3225 num_online_nodes());
3226 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3227 num_online_nodes() + target->comp_vector)
3228 % ibdev->num_comp_vectors;
3229 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3230 num_online_nodes() + target->comp_vector)
3231 % ibdev->num_comp_vectors;
3234 for_each_online_cpu(cpu) {
3235 if (cpu_to_node(cpu) != node)
3237 if (ch_start + cpu_idx >= ch_end)
3239 ch = &target->ch[ch_start + cpu_idx];
3240 ch->target = target;
3241 ch->comp_vector = cv_start == cv_end ? cv_start :
3242 cv_start + cpu_idx % (cv_end - cv_start);
3243 spin_lock_init(&ch->lock);
3244 INIT_LIST_HEAD(&ch->free_tx);
3245 ret = srp_new_cm_id(ch);
3247 goto err_disconnect;
3249 ret = srp_create_ch_ib(ch);
3251 goto err_disconnect;
3253 ret = srp_alloc_req_data(ch);
3255 goto err_disconnect;
3257 ret = srp_connect_ch(ch, multich);
3259 shost_printk(KERN_ERR, target->scsi_host,
3260 PFX "Connection %d/%d failed\n",
3263 if (node_idx == 0 && cpu_idx == 0) {
3264 goto err_disconnect;
3266 srp_free_ch_ib(target, ch);
3267 srp_free_req_data(target, ch);
3268 target->ch_count = ch - target->ch;
3280 target->scsi_host->nr_hw_queues = target->ch_count;
3282 ret = srp_add_target(host, target);
3284 goto err_disconnect;
3286 if (target->state != SRP_TARGET_REMOVED) {
3287 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3288 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3289 be64_to_cpu(target->id_ext),
3290 be64_to_cpu(target->ioc_guid),
3291 be16_to_cpu(target->pkey),
3292 be64_to_cpu(target->service_id),
3293 target->sgid.raw, target->orig_dgid.raw);
3299 mutex_unlock(&host->add_target_mutex);
3301 scsi_host_put(target->scsi_host);
3303 scsi_host_put(target->scsi_host);
3308 srp_disconnect_target(target);
3310 for (i = 0; i < target->ch_count; i++) {
3311 ch = &target->ch[i];
3312 srp_free_ch_ib(target, ch);
3313 srp_free_req_data(target, ch);
3320 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3322 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3325 struct srp_host *host = container_of(dev, struct srp_host, dev);
3327 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3330 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3332 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3335 struct srp_host *host = container_of(dev, struct srp_host, dev);
3337 return sprintf(buf, "%d\n", host->port);
3340 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3342 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3344 struct srp_host *host;
3346 host = kzalloc(sizeof *host, GFP_KERNEL);
3350 INIT_LIST_HEAD(&host->target_list);
3351 spin_lock_init(&host->target_lock);
3352 init_completion(&host->released);
3353 mutex_init(&host->add_target_mutex);
3354 host->srp_dev = device;
3357 host->dev.class = &srp_class;
3358 host->dev.parent = device->dev->dma_device;
3359 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3361 if (device_register(&host->dev))
3363 if (device_create_file(&host->dev, &dev_attr_add_target))
3365 if (device_create_file(&host->dev, &dev_attr_ibdev))
3367 if (device_create_file(&host->dev, &dev_attr_port))
3373 device_unregister(&host->dev);
3381 static void srp_add_one(struct ib_device *device)
3383 struct srp_device *srp_dev;
3384 struct ib_device_attr *dev_attr;
3385 struct srp_host *host;
3386 int mr_page_shift, p;
3387 u64 max_pages_per_mr;
3389 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3393 if (ib_query_device(device, dev_attr)) {
3394 pr_warn("Query device failed for %s\n", device->name);
3398 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3402 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3403 device->map_phys_fmr && device->unmap_fmr);
3404 srp_dev->has_fr = (dev_attr->device_cap_flags &
3405 IB_DEVICE_MEM_MGT_EXTENSIONS);
3406 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3407 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3409 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3410 (!srp_dev->has_fmr || prefer_fr));
3413 * Use the smallest page size supported by the HCA, down to a
3414 * minimum of 4096 bytes. We're unlikely to build large sglists
3415 * out of smaller entries.
3417 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
3418 srp_dev->mr_page_size = 1 << mr_page_shift;
3419 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3420 max_pages_per_mr = dev_attr->max_mr_size;
3421 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3422 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3424 if (srp_dev->use_fast_reg) {
3425 srp_dev->max_pages_per_mr =
3426 min_t(u32, srp_dev->max_pages_per_mr,
3427 dev_attr->max_fast_reg_page_list_len);
3429 srp_dev->mr_max_size = srp_dev->mr_page_size *
3430 srp_dev->max_pages_per_mr;
3431 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3432 device->name, mr_page_shift, dev_attr->max_mr_size,
3433 dev_attr->max_fast_reg_page_list_len,
3434 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3436 INIT_LIST_HEAD(&srp_dev->dev_list);
3438 srp_dev->dev = device;
3439 srp_dev->pd = ib_alloc_pd(device);
3440 if (IS_ERR(srp_dev->pd))
3443 srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
3444 IB_ACCESS_LOCAL_WRITE |
3445 IB_ACCESS_REMOTE_READ |
3446 IB_ACCESS_REMOTE_WRITE);
3447 if (IS_ERR(srp_dev->mr))
3450 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3451 host = srp_add_port(srp_dev, p);
3453 list_add_tail(&host->list, &srp_dev->dev_list);
3456 ib_set_client_data(device, &srp_client, srp_dev);
3461 ib_dealloc_pd(srp_dev->pd);
3470 static void srp_remove_one(struct ib_device *device, void *client_data)
3472 struct srp_device *srp_dev;
3473 struct srp_host *host, *tmp_host;
3474 struct srp_target_port *target;
3476 srp_dev = client_data;
3480 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3481 device_unregister(&host->dev);
3483 * Wait for the sysfs entry to go away, so that no new
3484 * target ports can be created.
3486 wait_for_completion(&host->released);
3489 * Remove all target ports.
3491 spin_lock(&host->target_lock);
3492 list_for_each_entry(target, &host->target_list, list)
3493 srp_queue_remove_work(target);
3494 spin_unlock(&host->target_lock);
3497 * Wait for tl_err and target port removal tasks.
3499 flush_workqueue(system_long_wq);
3500 flush_workqueue(srp_remove_wq);
3505 ib_dereg_mr(srp_dev->mr);
3506 ib_dealloc_pd(srp_dev->pd);
3511 static struct srp_function_template ib_srp_transport_functions = {
3512 .has_rport_state = true,
3513 .reset_timer_if_blocked = true,
3514 .reconnect_delay = &srp_reconnect_delay,
3515 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3516 .dev_loss_tmo = &srp_dev_loss_tmo,
3517 .reconnect = srp_rport_reconnect,
3518 .rport_delete = srp_rport_delete,
3519 .terminate_rport_io = srp_terminate_io,
3522 static int __init srp_init_module(void)
3526 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3528 if (srp_sg_tablesize) {
3529 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3530 if (!cmd_sg_entries)
3531 cmd_sg_entries = srp_sg_tablesize;
3534 if (!cmd_sg_entries)
3535 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3537 if (cmd_sg_entries > 255) {
3538 pr_warn("Clamping cmd_sg_entries to 255\n");
3539 cmd_sg_entries = 255;
3542 if (!indirect_sg_entries)
3543 indirect_sg_entries = cmd_sg_entries;
3544 else if (indirect_sg_entries < cmd_sg_entries) {
3545 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3547 indirect_sg_entries = cmd_sg_entries;
3550 srp_remove_wq = create_workqueue("srp_remove");
3551 if (!srp_remove_wq) {
3557 ib_srp_transport_template =
3558 srp_attach_transport(&ib_srp_transport_functions);
3559 if (!ib_srp_transport_template)
3562 ret = class_register(&srp_class);
3564 pr_err("couldn't register class infiniband_srp\n");
3568 ib_sa_register_client(&srp_sa_client);
3570 ret = ib_register_client(&srp_client);
3572 pr_err("couldn't register IB client\n");
3580 ib_sa_unregister_client(&srp_sa_client);
3581 class_unregister(&srp_class);
3584 srp_release_transport(ib_srp_transport_template);
3587 destroy_workqueue(srp_remove_wq);
3591 static void __exit srp_cleanup_module(void)
3593 ib_unregister_client(&srp_client);
3594 ib_sa_unregister_client(&srp_sa_client);
3595 class_unregister(&srp_class);
3596 srp_release_transport(ib_srp_transport_template);
3597 destroy_workqueue(srp_remove_wq);
3600 module_init(srp_init_module);
3601 module_exit(srp_cleanup_module);