2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2015 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
19 #include "scif_main.h"
22 * scif_recv_mark: Handle SCIF_MARK request
23 * @msg: Interrupt message
25 * The peer has requested a mark.
27 void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
29 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
32 err = _scif_fence_mark(ep, &mark);
34 msg->uop = SCIF_MARK_NACK;
36 msg->uop = SCIF_MARK_ACK;
37 msg->payload[0] = ep->remote_ep;
38 msg->payload[2] = mark;
39 scif_nodeqp_send(ep->remote_dev, msg);
43 * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
44 * @msg: Interrupt message
46 * The peer has responded to a SCIF_MARK message.
48 void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
50 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
51 struct scif_fence_info *fence_req =
52 (struct scif_fence_info *)msg->payload[1];
54 mutex_lock(&ep->rma_info.rma_lock);
55 if (msg->uop == SCIF_MARK_ACK) {
56 fence_req->state = OP_COMPLETED;
57 fence_req->dma_mark = (int)msg->payload[2];
59 fence_req->state = OP_FAILED;
61 mutex_unlock(&ep->rma_info.rma_lock);
62 complete(&fence_req->comp);
66 * scif_recv_wait: Handle SCIF_WAIT request
67 * @msg: Interrupt message
69 * The peer has requested waiting on a fence.
71 void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
73 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
74 struct scif_remote_fence_info *fence;
77 * Allocate structure for remote fence information and
78 * send a NACK if the allocation failed. The peer will
79 * return ENOMEM upon receiving a NACK.
81 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
83 msg->payload[0] = ep->remote_ep;
84 msg->uop = SCIF_WAIT_NACK;
85 scif_nodeqp_send(ep->remote_dev, msg);
89 /* Prepare the fence request */
90 memcpy(&fence->msg, msg, sizeof(struct scifmsg));
91 INIT_LIST_HEAD(&fence->list);
93 /* Insert to the global remote fence request list */
94 mutex_lock(&scif_info.fencelock);
95 atomic_inc(&ep->rma_info.fence_refcount);
96 list_add_tail(&fence->list, &scif_info.fence);
97 mutex_unlock(&scif_info.fencelock);
99 schedule_work(&scif_info.misc_work);
103 * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
104 * @msg: Interrupt message
106 * The peer has responded to a SCIF_WAIT message.
108 void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
110 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
111 struct scif_fence_info *fence_req =
112 (struct scif_fence_info *)msg->payload[1];
114 mutex_lock(&ep->rma_info.rma_lock);
115 if (msg->uop == SCIF_WAIT_ACK)
116 fence_req->state = OP_COMPLETED;
118 fence_req->state = OP_FAILED;
119 mutex_unlock(&ep->rma_info.rma_lock);
120 complete(&fence_req->comp);
124 * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
125 * @msg: Interrupt message
127 * The peer has requested a signal on a local offset.
129 void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
131 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
134 err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
137 msg->uop = SCIF_SIG_NACK;
139 msg->uop = SCIF_SIG_ACK;
140 msg->payload[0] = ep->remote_ep;
141 scif_nodeqp_send(ep->remote_dev, msg);
145 * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
146 * @msg: Interrupt message
148 * The peer has requested a signal on a remote offset.
150 void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
152 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
155 err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
158 msg->uop = SCIF_SIG_NACK;
160 msg->uop = SCIF_SIG_ACK;
161 msg->payload[0] = ep->remote_ep;
162 scif_nodeqp_send(ep->remote_dev, msg);
166 * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
167 * @msg: Interrupt message
169 * The peer has responded to a signal request.
171 void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
173 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
174 struct scif_fence_info *fence_req =
175 (struct scif_fence_info *)msg->payload[3];
177 mutex_lock(&ep->rma_info.rma_lock);
178 if (msg->uop == SCIF_SIG_ACK)
179 fence_req->state = OP_COMPLETED;
181 fence_req->state = OP_FAILED;
182 mutex_unlock(&ep->rma_info.rma_lock);
183 complete(&fence_req->comp);
186 static inline void *scif_get_local_va(off_t off, struct scif_window *window)
188 struct page **pages = window->pinned_pages->pages;
189 int page_nr = (off - window->offset) >> PAGE_SHIFT;
190 off_t page_off = off & ~PAGE_MASK;
192 return page_address(pages[page_nr]) + page_off;
195 static void scif_prog_signal_cb(void *arg)
197 struct scif_status *status = arg;
199 dma_pool_free(status->ep->remote_dev->signal_pool, status,
200 status->src_dma_addr);
203 static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
205 struct scif_endpt *ep = (struct scif_endpt *)epd;
206 struct dma_chan *chan = ep->rma_info.dma_chan;
207 struct dma_device *ddev = chan->device;
208 bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
209 struct dma_async_tx_descriptor *tx;
210 struct scif_status *status = NULL;
215 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
218 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
219 __func__, __LINE__, err);
222 cookie = tx->tx_submit(tx);
223 if (dma_submit_error(cookie)) {
225 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
226 __func__, __LINE__, err);
229 dma_async_issue_pending(chan);
232 * For X100 use the status descriptor to write the value to
235 tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
237 status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
241 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
242 __func__, __LINE__, err);
246 status->src_dma_addr = src;
248 src += offsetof(struct scif_status, val);
249 tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
254 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
255 __func__, __LINE__, err);
259 tx->callback = scif_prog_signal_cb;
260 tx->callback_param = status;
262 cookie = tx->tx_submit(tx);
263 if (dma_submit_error(cookie)) {
265 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
266 __func__, __LINE__, err);
269 dma_async_issue_pending(chan);
273 dma_pool_free(ep->remote_dev->signal_pool, status,
274 status->src_dma_addr);
281 * @epd - Endpoint Descriptor
282 * @offset - registered address to write @val to
283 * @val - Value to be written at @offset
284 * @type - Type of the window.
286 * Arrange to write a value to the registered offset after ensuring that the
287 * offset provided is indeed valid.
289 int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
290 enum scif_window_type type)
292 struct scif_endpt *ep = (struct scif_endpt *)epd;
293 struct scif_window *window = NULL;
294 struct scif_rma_req req;
295 dma_addr_t dst_dma_addr;
298 mutex_lock(&ep->rma_info.rma_lock);
299 req.out_window = &window;
301 req.nr_bytes = sizeof(u64);
302 req.prot = SCIF_PROT_WRITE;
303 req.type = SCIF_WINDOW_SINGLE;
304 if (type == SCIF_WINDOW_SELF)
305 req.head = &ep->rma_info.reg_list;
307 req.head = &ep->rma_info.remote_reg_list;
308 /* Does a valid window exist? */
309 err = scif_query_window(&req);
311 dev_err(scif_info.mdev.this_device,
312 "%s %d err %d\n", __func__, __LINE__, err);
316 if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
319 if (type == SCIF_WINDOW_SELF)
320 dst_virt = scif_get_local_va(offset, window);
323 scif_get_local_va(offset, (struct scif_window *)
324 window->peer_window);
327 dst_dma_addr = __scif_off_to_dma_addr(window, offset);
328 err = _scif_prog_signal(epd, dst_dma_addr, val);
331 mutex_unlock(&ep->rma_info.rma_lock);
335 static int _scif_fence_wait(scif_epd_t epd, int mark)
337 struct scif_endpt *ep = (struct scif_endpt *)epd;
338 dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
341 /* Wait for DMA callback in scif_fence_mark_cb(..) */
342 err = wait_event_interruptible_timeout(ep->rma_info.markwq,
343 dma_async_is_tx_complete(
344 ep->rma_info.dma_chan,
345 cookie, NULL, NULL) ==
347 SCIF_NODE_ALIVE_TIMEOUT);
356 * scif_rma_handle_remote_fences:
358 * This routine services remote fence requests.
360 void scif_rma_handle_remote_fences(void)
362 struct list_head *item, *tmp;
363 struct scif_remote_fence_info *fence;
364 struct scif_endpt *ep;
368 mutex_lock(&scif_info.fencelock);
369 list_for_each_safe(item, tmp, &scif_info.fence) {
370 fence = list_entry(item, struct scif_remote_fence_info,
372 /* Remove fence from global list */
373 list_del(&fence->list);
375 /* Initiate the fence operation */
376 ep = (struct scif_endpt *)fence->msg.payload[0];
377 mark = fence->msg.payload[2];
378 err = _scif_fence_wait(ep, mark);
380 fence->msg.uop = SCIF_WAIT_NACK;
382 fence->msg.uop = SCIF_WAIT_ACK;
383 fence->msg.payload[0] = ep->remote_ep;
384 scif_nodeqp_send(ep->remote_dev, &fence->msg);
386 if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
387 schedule_work(&scif_info.misc_work);
389 mutex_unlock(&scif_info.fencelock);
392 static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
396 struct scif_fence_info *fence_req;
397 struct scif_endpt *ep = (struct scif_endpt *)epd;
399 fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
405 fence_req->state = OP_IN_PROGRESS;
406 init_completion(&fence_req->comp);
410 msg.payload[0] = ep->remote_ep;
411 msg.payload[1] = (u64)fence_req;
412 if (uop == SCIF_WAIT)
413 msg.payload[2] = mark;
414 spin_lock(&ep->lock);
415 if (ep->state == SCIFEP_CONNECTED)
416 err = scif_nodeqp_send(ep->remote_dev, &msg);
419 spin_unlock(&ep->lock);
423 /* Wait for a SCIF_WAIT_(N)ACK message */
424 err = wait_for_completion_timeout(&fence_req->comp,
425 SCIF_NODE_ALIVE_TIMEOUT);
426 if (!err && scifdev_alive(ep))
432 mutex_lock(&ep->rma_info.rma_lock);
434 if (fence_req->state == OP_IN_PROGRESS)
435 fence_req->state = OP_FAILED;
437 if (fence_req->state == OP_FAILED && !err)
439 if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
440 *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
441 mutex_unlock(&ep->rma_info.rma_lock);
449 * scif_send_fence_mark:
450 * @epd: end point descriptor.
451 * @out_mark: Output DMA mark reported by peer.
453 * Send a remote fence mark request.
455 static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
457 return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
461 * scif_send_fence_wait:
462 * @epd: end point descriptor.
463 * @mark: DMA mark to wait for.
465 * Send a remote fence wait request.
467 static int scif_send_fence_wait(scif_epd_t epd, int mark)
469 return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
472 static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
473 struct scif_fence_info *fence_req)
478 /* Wait for a SCIF_SIG_(N)ACK message */
479 err = wait_for_completion_timeout(&fence_req->comp,
480 SCIF_NODE_ALIVE_TIMEOUT);
481 if (!err && scifdev_alive(ep))
488 mutex_lock(&ep->rma_info.rma_lock);
489 if (fence_req->state == OP_IN_PROGRESS)
490 fence_req->state = OP_FAILED;
491 mutex_unlock(&ep->rma_info.rma_lock);
493 if (fence_req->state == OP_FAILED && !err)
499 * scif_send_fence_signal:
500 * @epd - endpoint descriptor
501 * @loff - local offset
502 * @lval - local value to write to loffset
503 * @roff - remote offset
504 * @rval - remote value to write to roffset
507 * Sends a remote fence signal request
509 static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
510 off_t loff, u64 lval, int flags)
514 struct scif_fence_info *fence_req;
515 struct scif_endpt *ep = (struct scif_endpt *)epd;
517 fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
523 fence_req->state = OP_IN_PROGRESS;
524 init_completion(&fence_req->comp);
526 if (flags & SCIF_SIGNAL_LOCAL) {
527 msg.uop = SCIF_SIG_LOCAL;
528 msg.payload[0] = ep->remote_ep;
529 msg.payload[1] = roff;
530 msg.payload[2] = rval;
531 msg.payload[3] = (u64)fence_req;
532 spin_lock(&ep->lock);
533 if (ep->state == SCIFEP_CONNECTED)
534 err = scif_nodeqp_send(ep->remote_dev, &msg);
537 spin_unlock(&ep->lock);
540 err = _scif_send_fence_signal_wait(ep, fence_req);
544 fence_req->state = OP_IN_PROGRESS;
546 if (flags & SCIF_SIGNAL_REMOTE) {
547 msg.uop = SCIF_SIG_REMOTE;
548 msg.payload[0] = ep->remote_ep;
549 msg.payload[1] = loff;
550 msg.payload[2] = lval;
551 msg.payload[3] = (u64)fence_req;
552 spin_lock(&ep->lock);
553 if (ep->state == SCIFEP_CONNECTED)
554 err = scif_nodeqp_send(ep->remote_dev, &msg);
557 spin_unlock(&ep->lock);
560 err = _scif_send_fence_signal_wait(ep, fence_req);
568 static void scif_fence_mark_cb(void *arg)
570 struct scif_endpt *ep = (struct scif_endpt *)arg;
572 wake_up_interruptible(&ep->rma_info.markwq);
573 atomic_dec(&ep->rma_info.fence_refcount);
579 * @epd - endpoint descriptor
580 * Set up a mark for this endpoint and return the value of the mark.
582 int _scif_fence_mark(scif_epd_t epd, int *mark)
584 struct scif_endpt *ep = (struct scif_endpt *)epd;
585 struct dma_chan *chan = ep->rma_info.dma_chan;
586 struct dma_device *ddev = chan->device;
587 struct dma_async_tx_descriptor *tx;
591 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
594 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
595 __func__, __LINE__, err);
598 cookie = tx->tx_submit(tx);
599 if (dma_submit_error(cookie)) {
601 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
602 __func__, __LINE__, err);
605 dma_async_issue_pending(chan);
606 tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
609 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
610 __func__, __LINE__, err);
613 tx->callback = scif_fence_mark_cb;
614 tx->callback_param = ep;
615 *mark = cookie = tx->tx_submit(tx);
616 if (dma_submit_error(cookie)) {
618 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
619 __func__, __LINE__, err);
622 atomic_inc(&ep->rma_info.fence_refcount);
623 dma_async_issue_pending(chan);
627 #define SCIF_LOOPB_MAGIC_MARK 0xdead
629 int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
631 struct scif_endpt *ep = (struct scif_endpt *)epd;
634 dev_dbg(scif_info.mdev.this_device,
635 "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
637 err = scif_verify_epd(ep);
642 if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
645 /* At least one of init self or peer RMA should be set */
646 if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
649 /* Exactly one of init self or peer RMA should be set but not both */
650 if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
654 * Management node loopback does not need to use DMA.
655 * Return a valid mark to be symmetric.
657 if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
658 *mark = SCIF_LOOPB_MAGIC_MARK;
662 if (flags & SCIF_FENCE_INIT_SELF)
663 err = _scif_fence_mark(epd, mark);
665 err = scif_send_fence_mark(ep, mark);
668 dev_err(scif_info.mdev.this_device,
669 "%s %d err %d\n", __func__, __LINE__, err);
670 dev_dbg(scif_info.mdev.this_device,
671 "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
672 ep, flags, *mark, err);
675 EXPORT_SYMBOL_GPL(scif_fence_mark);
677 int scif_fence_wait(scif_epd_t epd, int mark)
679 struct scif_endpt *ep = (struct scif_endpt *)epd;
682 dev_dbg(scif_info.mdev.this_device,
683 "SCIFAPI fence_wait: ep %p mark 0x%x\n",
685 err = scif_verify_epd(ep);
689 * Management node loopback does not need to use DMA.
690 * The only valid mark provided is 0 so simply
691 * return success if the mark is valid.
693 if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
694 if (mark == SCIF_LOOPB_MAGIC_MARK)
699 if (mark & SCIF_REMOTE_FENCE)
700 err = scif_send_fence_wait(epd, mark);
702 err = _scif_fence_wait(epd, mark);
704 dev_err(scif_info.mdev.this_device,
705 "%s %d err %d\n", __func__, __LINE__, err);
708 EXPORT_SYMBOL_GPL(scif_fence_wait);
710 int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
711 off_t roff, u64 rval, int flags)
713 struct scif_endpt *ep = (struct scif_endpt *)epd;
716 dev_dbg(scif_info.mdev.this_device,
717 "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
718 ep, loff, lval, roff, rval, flags);
719 err = scif_verify_epd(ep);
724 if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
725 SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
728 /* At least one of init self or peer RMA should be set */
729 if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
732 /* Exactly one of init self or peer RMA should be set but not both */
733 if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
736 /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
737 if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
740 /* Only Dword offsets allowed */
741 if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
744 /* Only Dword aligned offsets allowed */
745 if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
748 if (flags & SCIF_FENCE_INIT_PEER) {
749 err = scif_send_fence_signal(epd, roff, rval, loff,
752 /* Local Signal in Local RAS */
753 if (flags & SCIF_SIGNAL_LOCAL) {
754 err = scif_prog_signal(epd, loff, lval,
760 /* Signal in Remote RAS */
761 if (flags & SCIF_SIGNAL_REMOTE)
762 err = scif_prog_signal(epd, roff,
763 rval, SCIF_WINDOW_PEER);
767 dev_err(scif_info.mdev.this_device,
768 "%s %d err %d\n", __func__, __LINE__, err);
771 EXPORT_SYMBOL_GPL(scif_fence_signal);