Merge 4.3-rc5 into char-misc next
[firefly-linux-kernel-4.4.55.git] / drivers / misc / mic / scif / scif_rma.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2015 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * Intel SCIF driver.
16  *
17  */
18 #include <linux/dma_remapping.h>
19 #include <linux/pagemap.h>
20 #include "scif_main.h"
21 #include "scif_map.h"
22
23 /* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
24 #define SCIF_MAP_ULIMIT 0x40
25
26 bool scif_ulimit_check = 1;
27
28 /**
29  * scif_rma_ep_init:
30  * @ep: end point
31  *
32  * Initialize RMA per EP data structures.
33  */
34 void scif_rma_ep_init(struct scif_endpt *ep)
35 {
36         struct scif_endpt_rma_info *rma = &ep->rma_info;
37
38         mutex_init(&rma->rma_lock);
39         init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
40                          SCIF_DMA_64BIT_PFN);
41         spin_lock_init(&rma->tc_lock);
42         mutex_init(&rma->mmn_lock);
43         INIT_LIST_HEAD(&rma->reg_list);
44         INIT_LIST_HEAD(&rma->remote_reg_list);
45         atomic_set(&rma->tw_refcount, 0);
46         atomic_set(&rma->tcw_refcount, 0);
47         atomic_set(&rma->tcw_total_pages, 0);
48         atomic_set(&rma->fence_refcount, 0);
49
50         rma->async_list_del = 0;
51         rma->dma_chan = NULL;
52         INIT_LIST_HEAD(&rma->mmn_list);
53         INIT_LIST_HEAD(&rma->vma_list);
54         init_waitqueue_head(&rma->markwq);
55 }
56
57 /**
58  * scif_rma_ep_can_uninit:
59  * @ep: end point
60  *
61  * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
62  */
63 int scif_rma_ep_can_uninit(struct scif_endpt *ep)
64 {
65         int ret = 0;
66
67         mutex_lock(&ep->rma_info.rma_lock);
68         /* Destroy RMA Info only if both lists are empty */
69         if (list_empty(&ep->rma_info.reg_list) &&
70             list_empty(&ep->rma_info.remote_reg_list) &&
71             list_empty(&ep->rma_info.mmn_list) &&
72             !atomic_read(&ep->rma_info.tw_refcount) &&
73             !atomic_read(&ep->rma_info.tcw_refcount) &&
74             !atomic_read(&ep->rma_info.fence_refcount))
75                 ret = 1;
76         mutex_unlock(&ep->rma_info.rma_lock);
77         return ret;
78 }
79
80 /**
81  * scif_create_pinned_pages:
82  * @nr_pages: number of pages in window
83  * @prot: read/write protection
84  *
85  * Allocate and prepare a set of pinned pages.
86  */
87 static struct scif_pinned_pages *
88 scif_create_pinned_pages(int nr_pages, int prot)
89 {
90         struct scif_pinned_pages *pin;
91
92         might_sleep();
93         pin = scif_zalloc(sizeof(*pin));
94         if (!pin)
95                 goto error;
96
97         pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
98         if (!pin->pages)
99                 goto error_free_pinned_pages;
100
101         pin->prot = prot;
102         pin->magic = SCIFEP_MAGIC;
103         return pin;
104
105 error_free_pinned_pages:
106         scif_free(pin, sizeof(*pin));
107 error:
108         return NULL;
109 }
110
111 /**
112  * scif_destroy_pinned_pages:
113  * @pin: A set of pinned pages.
114  *
115  * Deallocate resources for pinned pages.
116  */
117 static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
118 {
119         int j;
120         int writeable = pin->prot & SCIF_PROT_WRITE;
121         int kernel = SCIF_MAP_KERNEL & pin->map_flags;
122
123         for (j = 0; j < pin->nr_pages; j++) {
124                 if (pin->pages[j] && !kernel) {
125                         if (writeable)
126                                 SetPageDirty(pin->pages[j]);
127                         put_page(pin->pages[j]);
128                 }
129         }
130
131         scif_free(pin->pages,
132                   pin->nr_pages * sizeof(*pin->pages));
133         scif_free(pin, sizeof(*pin));
134         return 0;
135 }
136
137 /*
138  * scif_create_window:
139  * @ep: end point
140  * @nr_pages: number of pages
141  * @offset: registration offset
142  * @temp: true if a temporary window is being created
143  *
144  * Allocate and prepare a self registration window.
145  */
146 struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
147                                        s64 offset, bool temp)
148 {
149         struct scif_window *window;
150
151         might_sleep();
152         window = scif_zalloc(sizeof(*window));
153         if (!window)
154                 goto error;
155
156         window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
157         if (!window->dma_addr)
158                 goto error_free_window;
159
160         window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
161         if (!window->num_pages)
162                 goto error_free_window;
163
164         window->offset = offset;
165         window->ep = (u64)ep;
166         window->magic = SCIFEP_MAGIC;
167         window->reg_state = OP_IDLE;
168         init_waitqueue_head(&window->regwq);
169         window->unreg_state = OP_IDLE;
170         init_waitqueue_head(&window->unregwq);
171         INIT_LIST_HEAD(&window->list);
172         window->type = SCIF_WINDOW_SELF;
173         window->temp = temp;
174         return window;
175
176 error_free_window:
177         scif_free(window->dma_addr,
178                   nr_pages * sizeof(*window->dma_addr));
179         scif_free(window, sizeof(*window));
180 error:
181         return NULL;
182 }
183
184 /**
185  * scif_destroy_incomplete_window:
186  * @ep: end point
187  * @window: registration window
188  *
189  * Deallocate resources for self window.
190  */
191 static void scif_destroy_incomplete_window(struct scif_endpt *ep,
192                                            struct scif_window *window)
193 {
194         int err;
195         int nr_pages = window->nr_pages;
196         struct scif_allocmsg *alloc = &window->alloc_handle;
197         struct scifmsg msg;
198
199 retry:
200         /* Wait for a SCIF_ALLOC_GNT/REJ message */
201         err = wait_event_timeout(alloc->allocwq,
202                                  alloc->state != OP_IN_PROGRESS,
203                                  SCIF_NODE_ALIVE_TIMEOUT);
204         if (!err && scifdev_alive(ep))
205                 goto retry;
206
207         mutex_lock(&ep->rma_info.rma_lock);
208         if (alloc->state == OP_COMPLETED) {
209                 msg.uop = SCIF_FREE_VIRT;
210                 msg.src = ep->port;
211                 msg.payload[0] = ep->remote_ep;
212                 msg.payload[1] = window->alloc_handle.vaddr;
213                 msg.payload[2] = (u64)window;
214                 msg.payload[3] = SCIF_REGISTER;
215                 _scif_nodeqp_send(ep->remote_dev, &msg);
216         }
217         mutex_unlock(&ep->rma_info.rma_lock);
218
219         scif_free_window_offset(ep, window, window->offset);
220         scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
221         scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
222         scif_free(window, sizeof(*window));
223 }
224
225 /**
226  * scif_unmap_window:
227  * @remote_dev: SCIF remote device
228  * @window: registration window
229  *
230  * Delete any DMA mappings created for a registered self window
231  */
232 void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
233 {
234         int j;
235
236         if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
237                 if (window->st) {
238                         dma_unmap_sg(&remote_dev->sdev->dev,
239                                      window->st->sgl, window->st->nents,
240                                      DMA_BIDIRECTIONAL);
241                         sg_free_table(window->st);
242                         kfree(window->st);
243                         window->st = NULL;
244                 }
245         } else {
246                 for (j = 0; j < window->nr_contig_chunks; j++) {
247                         if (window->dma_addr[j]) {
248                                 scif_unmap_single(window->dma_addr[j],
249                                                   remote_dev,
250                                                   window->num_pages[j] <<
251                                                   PAGE_SHIFT);
252                                 window->dma_addr[j] = 0x0;
253                         }
254                 }
255         }
256 }
257
258 static inline struct mm_struct *__scif_acquire_mm(void)
259 {
260         if (scif_ulimit_check)
261                 return get_task_mm(current);
262         return NULL;
263 }
264
265 static inline void __scif_release_mm(struct mm_struct *mm)
266 {
267         if (mm)
268                 mmput(mm);
269 }
270
271 static inline int
272 __scif_dec_pinned_vm_lock(struct mm_struct *mm,
273                           int nr_pages, bool try_lock)
274 {
275         if (!mm || !nr_pages || !scif_ulimit_check)
276                 return 0;
277         if (try_lock) {
278                 if (!down_write_trylock(&mm->mmap_sem)) {
279                         dev_err(scif_info.mdev.this_device,
280                                 "%s %d err\n", __func__, __LINE__);
281                         return -1;
282                 }
283         } else {
284                 down_write(&mm->mmap_sem);
285         }
286         mm->pinned_vm -= nr_pages;
287         up_write(&mm->mmap_sem);
288         return 0;
289 }
290
291 static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
292                                              int nr_pages)
293 {
294         unsigned long locked, lock_limit;
295
296         if (!mm || !nr_pages || !scif_ulimit_check)
297                 return 0;
298
299         locked = nr_pages;
300         locked += mm->pinned_vm;
301         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
302         if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
303                 dev_err(scif_info.mdev.this_device,
304                         "locked(%lu) > lock_limit(%lu)\n",
305                         locked, lock_limit);
306                 return -ENOMEM;
307         }
308         mm->pinned_vm = locked;
309         return 0;
310 }
311
312 /**
313  * scif_destroy_window:
314  * @ep: end point
315  * @window: registration window
316  *
317  * Deallocate resources for self window.
318  */
319 int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
320 {
321         int j;
322         struct scif_pinned_pages *pinned_pages = window->pinned_pages;
323         int nr_pages = window->nr_pages;
324
325         might_sleep();
326         if (!window->temp && window->mm) {
327                 __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
328                 __scif_release_mm(window->mm);
329                 window->mm = NULL;
330         }
331
332         scif_free_window_offset(ep, window, window->offset);
333         scif_unmap_window(ep->remote_dev, window);
334         /*
335          * Decrement references for this set of pinned pages from
336          * this window.
337          */
338         j = atomic_sub_return(1, &pinned_pages->ref_count);
339         if (j < 0)
340                 dev_err(scif_info.mdev.this_device,
341                         "%s %d incorrect ref count %d\n",
342                         __func__, __LINE__, j);
343         /*
344          * If the ref count for pinned_pages is zero then someone
345          * has already called scif_unpin_pages() for it and we should
346          * destroy the page cache.
347          */
348         if (!j)
349                 scif_destroy_pinned_pages(window->pinned_pages);
350         scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
351         scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
352         window->magic = 0;
353         scif_free(window, sizeof(*window));
354         return 0;
355 }
356
357 /**
358  * scif_create_remote_lookup:
359  * @remote_dev: SCIF remote device
360  * @window: remote window
361  *
362  * Allocate and prepare lookup entries for the remote
363  * end to copy over the physical addresses.
364  * Returns 0 on success and appropriate errno on failure.
365  */
366 static int scif_create_remote_lookup(struct scif_dev *remote_dev,
367                                      struct scif_window *window)
368 {
369         int i, j, err = 0;
370         int nr_pages = window->nr_pages;
371         bool vmalloc_dma_phys, vmalloc_num_pages;
372
373         might_sleep();
374         /* Map window */
375         err = scif_map_single(&window->mapped_offset,
376                               window, remote_dev, sizeof(*window));
377         if (err)
378                 goto error_window;
379
380         /* Compute the number of lookup entries. 21 == 2MB Shift */
381         window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
382                                         ((2) * 1024 * 1024)) >> 21;
383
384         window->dma_addr_lookup.lookup =
385                 scif_alloc_coherent(&window->dma_addr_lookup.offset,
386                                     remote_dev, window->nr_lookup *
387                                     sizeof(*window->dma_addr_lookup.lookup),
388                                     GFP_KERNEL | __GFP_ZERO);
389         if (!window->dma_addr_lookup.lookup)
390                 goto error_window;
391
392         window->num_pages_lookup.lookup =
393                 scif_alloc_coherent(&window->num_pages_lookup.offset,
394                                     remote_dev, window->nr_lookup *
395                                     sizeof(*window->num_pages_lookup.lookup),
396                                     GFP_KERNEL | __GFP_ZERO);
397         if (!window->num_pages_lookup.lookup)
398                 goto error_window;
399
400         vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
401         vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
402
403         /* Now map each of the pages containing physical addresses */
404         for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
405                 err = scif_map_page(&window->dma_addr_lookup.lookup[j],
406                                     vmalloc_dma_phys ?
407                                     vmalloc_to_page(&window->dma_addr[i]) :
408                                     virt_to_page(&window->dma_addr[i]),
409                                     remote_dev);
410                 if (err)
411                         goto error_window;
412                 err = scif_map_page(&window->num_pages_lookup.lookup[j],
413                                     vmalloc_dma_phys ?
414                                     vmalloc_to_page(&window->num_pages[i]) :
415                                     virt_to_page(&window->num_pages[i]),
416                                     remote_dev);
417                 if (err)
418                         goto error_window;
419         }
420         return 0;
421 error_window:
422         return err;
423 }
424
425 /**
426  * scif_destroy_remote_lookup:
427  * @remote_dev: SCIF remote device
428  * @window: remote window
429  *
430  * Destroy lookup entries used for the remote
431  * end to copy over the physical addresses.
432  */
433 static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
434                                        struct scif_window *window)
435 {
436         int i, j;
437
438         if (window->nr_lookup) {
439                 struct scif_rma_lookup *lup = &window->dma_addr_lookup;
440                 struct scif_rma_lookup *npup = &window->num_pages_lookup;
441
442                 for (i = 0, j = 0; i < window->nr_pages;
443                         i += SCIF_NR_ADDR_IN_PAGE, j++) {
444                         if (lup->lookup && lup->lookup[j])
445                                 scif_unmap_single(lup->lookup[j],
446                                                   remote_dev,
447                                                   PAGE_SIZE);
448                         if (npup->lookup && npup->lookup[j])
449                                 scif_unmap_single(npup->lookup[j],
450                                                   remote_dev,
451                                                   PAGE_SIZE);
452                 }
453                 if (lup->lookup)
454                         scif_free_coherent(lup->lookup, lup->offset,
455                                            remote_dev, window->nr_lookup *
456                                            sizeof(*lup->lookup));
457                 if (npup->lookup)
458                         scif_free_coherent(npup->lookup, npup->offset,
459                                            remote_dev, window->nr_lookup *
460                                            sizeof(*npup->lookup));
461                 if (window->mapped_offset)
462                         scif_unmap_single(window->mapped_offset,
463                                           remote_dev, sizeof(*window));
464                 window->nr_lookup = 0;
465         }
466 }
467
468 /**
469  * scif_create_remote_window:
470  * @ep: end point
471  * @nr_pages: number of pages in window
472  *
473  * Allocate and prepare a remote registration window.
474  */
475 static struct scif_window *
476 scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
477 {
478         struct scif_window *window;
479
480         might_sleep();
481         window = scif_zalloc(sizeof(*window));
482         if (!window)
483                 goto error_ret;
484
485         window->magic = SCIFEP_MAGIC;
486         window->nr_pages = nr_pages;
487
488         window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
489         if (!window->dma_addr)
490                 goto error_window;
491
492         window->num_pages = scif_zalloc(nr_pages *
493                                         sizeof(*window->num_pages));
494         if (!window->num_pages)
495                 goto error_window;
496
497         if (scif_create_remote_lookup(scifdev, window))
498                 goto error_window;
499
500         window->type = SCIF_WINDOW_PEER;
501         window->unreg_state = OP_IDLE;
502         INIT_LIST_HEAD(&window->list);
503         return window;
504 error_window:
505         scif_destroy_remote_window(window);
506 error_ret:
507         return NULL;
508 }
509
510 /**
511  * scif_destroy_remote_window:
512  * @ep: end point
513  * @window: remote registration window
514  *
515  * Deallocate resources for remote window.
516  */
517 void
518 scif_destroy_remote_window(struct scif_window *window)
519 {
520         scif_free(window->dma_addr, window->nr_pages *
521                   sizeof(*window->dma_addr));
522         scif_free(window->num_pages, window->nr_pages *
523                   sizeof(*window->num_pages));
524         window->magic = 0;
525         scif_free(window, sizeof(*window));
526 }
527
528 /**
529  * scif_iommu_map: create DMA mappings if the IOMMU is enabled
530  * @remote_dev: SCIF remote device
531  * @window: remote registration window
532  *
533  * Map the physical pages using dma_map_sg(..) and then detect the number
534  * of contiguous DMA mappings allocated
535  */
536 static int scif_iommu_map(struct scif_dev *remote_dev,
537                           struct scif_window *window)
538 {
539         struct scatterlist *sg;
540         int i, err;
541         scif_pinned_pages_t pin = window->pinned_pages;
542
543         window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
544         if (!window->st)
545                 return -ENOMEM;
546
547         err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
548         if (err)
549                 return err;
550
551         for_each_sg(window->st->sgl, sg, window->st->nents, i)
552                 sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
553
554         err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
555                          window->st->nents, DMA_BIDIRECTIONAL);
556         if (!err)
557                 return -ENOMEM;
558         /* Detect contiguous ranges of DMA mappings */
559         sg = window->st->sgl;
560         for (i = 0; sg; i++) {
561                 dma_addr_t last_da;
562
563                 window->dma_addr[i] = sg_dma_address(sg);
564                 window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
565                 last_da = sg_dma_address(sg) + sg_dma_len(sg);
566                 while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
567                         window->num_pages[i] +=
568                                 (sg_dma_len(sg) >> PAGE_SHIFT);
569                         last_da = window->dma_addr[i] +
570                                 sg_dma_len(sg);
571                 }
572                 window->nr_contig_chunks++;
573         }
574         return 0;
575 }
576
577 /**
578  * scif_map_window:
579  * @remote_dev: SCIF remote device
580  * @window: self registration window
581  *
582  * Map pages of a window into the aperture/PCI.
583  * Also determine addresses required for DMA.
584  */
585 int
586 scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
587 {
588         int i, j, k, err = 0, nr_contig_pages;
589         scif_pinned_pages_t pin;
590         phys_addr_t phys_prev, phys_curr;
591
592         might_sleep();
593
594         pin = window->pinned_pages;
595
596         if (intel_iommu_enabled && !scifdev_self(remote_dev))
597                 return scif_iommu_map(remote_dev, window);
598
599         for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
600                 phys_prev = page_to_phys(pin->pages[i]);
601                 nr_contig_pages = 1;
602
603                 /* Detect physically contiguous chunks */
604                 for (k = i + 1; k < window->nr_pages; k++) {
605                         phys_curr = page_to_phys(pin->pages[k]);
606                         if (phys_curr != (phys_prev + PAGE_SIZE))
607                                 break;
608                         phys_prev = phys_curr;
609                         nr_contig_pages++;
610                 }
611                 window->num_pages[j] = nr_contig_pages;
612                 window->nr_contig_chunks++;
613                 if (scif_is_mgmt_node()) {
614                         /*
615                          * Management node has to deal with SMPT on X100 and
616                          * hence the DMA mapping is required
617                          */
618                         err = scif_map_single(&window->dma_addr[j],
619                                               phys_to_virt(page_to_phys(
620                                                            pin->pages[i])),
621                                               remote_dev,
622                                               nr_contig_pages << PAGE_SHIFT);
623                         if (err)
624                                 return err;
625                 } else {
626                         window->dma_addr[j] = page_to_phys(pin->pages[i]);
627                 }
628         }
629         return err;
630 }
631
632 /**
633  * scif_send_scif_unregister:
634  * @ep: end point
635  * @window: self registration window
636  *
637  * Send a SCIF_UNREGISTER message.
638  */
639 static int scif_send_scif_unregister(struct scif_endpt *ep,
640                                      struct scif_window *window)
641 {
642         struct scifmsg msg;
643
644         msg.uop = SCIF_UNREGISTER;
645         msg.src = ep->port;
646         msg.payload[0] = window->alloc_handle.vaddr;
647         msg.payload[1] = (u64)window;
648         return scif_nodeqp_send(ep->remote_dev, &msg);
649 }
650
651 /**
652  * scif_unregister_window:
653  * @window: self registration window
654  *
655  * Send an unregistration request and wait for a response.
656  */
657 int scif_unregister_window(struct scif_window *window)
658 {
659         int err = 0;
660         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
661         bool send_msg = false;
662
663         might_sleep();
664         switch (window->unreg_state) {
665         case OP_IDLE:
666         {
667                 window->unreg_state = OP_IN_PROGRESS;
668                 send_msg = true;
669                 /* fall through */
670         }
671         case OP_IN_PROGRESS:
672         {
673                 scif_get_window(window, 1);
674                 mutex_unlock(&ep->rma_info.rma_lock);
675                 if (send_msg) {
676                         err = scif_send_scif_unregister(ep, window);
677                         if (err) {
678                                 window->unreg_state = OP_COMPLETED;
679                                 goto done;
680                         }
681                 } else {
682                         /* Return ENXIO since unregistration is in progress */
683                         return -ENXIO;
684                 }
685 retry:
686                 /* Wait for a SCIF_UNREGISTER_(N)ACK message */
687                 err = wait_event_timeout(window->unregwq,
688                                          window->unreg_state != OP_IN_PROGRESS,
689                                          SCIF_NODE_ALIVE_TIMEOUT);
690                 if (!err && scifdev_alive(ep))
691                         goto retry;
692                 if (!err) {
693                         err = -ENODEV;
694                         window->unreg_state = OP_COMPLETED;
695                         dev_err(scif_info.mdev.this_device,
696                                 "%s %d err %d\n", __func__, __LINE__, err);
697                 }
698                 if (err > 0)
699                         err = 0;
700 done:
701                 mutex_lock(&ep->rma_info.rma_lock);
702                 scif_put_window(window, 1);
703                 break;
704         }
705         case OP_FAILED:
706         {
707                 if (!scifdev_alive(ep)) {
708                         err = -ENODEV;
709                         window->unreg_state = OP_COMPLETED;
710                 }
711                 break;
712         }
713         case OP_COMPLETED:
714                 break;
715         default:
716                 err = -ENODEV;
717         }
718
719         if (window->unreg_state == OP_COMPLETED && window->ref_count)
720                 scif_put_window(window, window->nr_pages);
721
722         if (!window->ref_count) {
723                 atomic_inc(&ep->rma_info.tw_refcount);
724                 list_del_init(&window->list);
725                 scif_free_window_offset(ep, window, window->offset);
726                 mutex_unlock(&ep->rma_info.rma_lock);
727                 if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
728                     scifdev_alive(ep)) {
729                         scif_drain_dma_intr(ep->remote_dev->sdev,
730                                             ep->rma_info.dma_chan);
731                 } else {
732                         if (!__scif_dec_pinned_vm_lock(window->mm,
733                                                        window->nr_pages, 1)) {
734                                 __scif_release_mm(window->mm);
735                                 window->mm = NULL;
736                         }
737                 }
738                 scif_queue_for_cleanup(window, &scif_info.rma);
739                 mutex_lock(&ep->rma_info.rma_lock);
740         }
741         return err;
742 }
743
744 /**
745  * scif_send_alloc_request:
746  * @ep: end point
747  * @window: self registration window
748  *
749  * Send a remote window allocation request
750  */
751 static int scif_send_alloc_request(struct scif_endpt *ep,
752                                    struct scif_window *window)
753 {
754         struct scifmsg msg;
755         struct scif_allocmsg *alloc = &window->alloc_handle;
756
757         /* Set up the Alloc Handle */
758         alloc->state = OP_IN_PROGRESS;
759         init_waitqueue_head(&alloc->allocwq);
760
761         /* Send out an allocation request */
762         msg.uop = SCIF_ALLOC_REQ;
763         msg.payload[1] = window->nr_pages;
764         msg.payload[2] = (u64)&window->alloc_handle;
765         return _scif_nodeqp_send(ep->remote_dev, &msg);
766 }
767
768 /**
769  * scif_prep_remote_window:
770  * @ep: end point
771  * @window: self registration window
772  *
773  * Send a remote window allocation request, wait for an allocation response,
774  * and prepares the remote window by copying over the page lists
775  */
776 static int scif_prep_remote_window(struct scif_endpt *ep,
777                                    struct scif_window *window)
778 {
779         struct scifmsg msg;
780         struct scif_window *remote_window;
781         struct scif_allocmsg *alloc = &window->alloc_handle;
782         dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
783         int i = 0, j = 0;
784         int nr_contig_chunks, loop_nr_contig_chunks;
785         int remaining_nr_contig_chunks, nr_lookup;
786         int err, map_err;
787
788         map_err = scif_map_window(ep->remote_dev, window);
789         if (map_err)
790                 dev_err(&ep->remote_dev->sdev->dev,
791                         "%s %d map_err %d\n", __func__, __LINE__, map_err);
792         remaining_nr_contig_chunks = window->nr_contig_chunks;
793         nr_contig_chunks = window->nr_contig_chunks;
794 retry:
795         /* Wait for a SCIF_ALLOC_GNT/REJ message */
796         err = wait_event_timeout(alloc->allocwq,
797                                  alloc->state != OP_IN_PROGRESS,
798                                  SCIF_NODE_ALIVE_TIMEOUT);
799         mutex_lock(&ep->rma_info.rma_lock);
800         /* Synchronize with the thread waking up allocwq */
801         mutex_unlock(&ep->rma_info.rma_lock);
802         if (!err && scifdev_alive(ep))
803                 goto retry;
804
805         if (!err)
806                 err = -ENODEV;
807
808         if (err > 0)
809                 err = 0;
810         else
811                 return err;
812
813         /* Bail out. The remote end rejected this request */
814         if (alloc->state == OP_FAILED)
815                 return -ENOMEM;
816
817         if (map_err) {
818                 dev_err(&ep->remote_dev->sdev->dev,
819                         "%s %d err %d\n", __func__, __LINE__, map_err);
820                 msg.uop = SCIF_FREE_VIRT;
821                 msg.src = ep->port;
822                 msg.payload[0] = ep->remote_ep;
823                 msg.payload[1] = window->alloc_handle.vaddr;
824                 msg.payload[2] = (u64)window;
825                 msg.payload[3] = SCIF_REGISTER;
826                 spin_lock(&ep->lock);
827                 if (ep->state == SCIFEP_CONNECTED)
828                         err = _scif_nodeqp_send(ep->remote_dev, &msg);
829                 else
830                         err = -ENOTCONN;
831                 spin_unlock(&ep->lock);
832                 return err;
833         }
834
835         remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
836                                      ep->remote_dev);
837
838         /* Compute the number of lookup entries. 21 == 2MB Shift */
839         nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
840                           >> ilog2(SCIF_NR_ADDR_IN_PAGE);
841
842         dma_phys_lookup =
843                 scif_ioremap(remote_window->dma_addr_lookup.offset,
844                              nr_lookup *
845                              sizeof(*remote_window->dma_addr_lookup.lookup),
846                              ep->remote_dev);
847         num_pages_lookup =
848                 scif_ioremap(remote_window->num_pages_lookup.offset,
849                              nr_lookup *
850                              sizeof(*remote_window->num_pages_lookup.lookup),
851                              ep->remote_dev);
852
853         while (remaining_nr_contig_chunks) {
854                 loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
855                                               (int)SCIF_NR_ADDR_IN_PAGE);
856                 /* #1/2 - Copy  physical addresses over to the remote side */
857
858                 /* #2/2 - Copy DMA addresses (addresses that are fed into the
859                  * DMA engine) We transfer bus addresses which are then
860                  * converted into a MIC physical address on the remote
861                  * side if it is a MIC, if the remote node is a mgmt node we
862                  * transfer the MIC physical address
863                  */
864                 tmp = scif_ioremap(dma_phys_lookup[j],
865                                    loop_nr_contig_chunks *
866                                    sizeof(*window->dma_addr),
867                                    ep->remote_dev);
868                 tmp1 = scif_ioremap(num_pages_lookup[j],
869                                     loop_nr_contig_chunks *
870                                     sizeof(*window->num_pages),
871                                     ep->remote_dev);
872                 if (scif_is_mgmt_node()) {
873                         memcpy_toio((void __force __iomem *)tmp,
874                                     &window->dma_addr[i], loop_nr_contig_chunks
875                                     * sizeof(*window->dma_addr));
876                         memcpy_toio((void __force __iomem *)tmp1,
877                                     &window->num_pages[i], loop_nr_contig_chunks
878                                     * sizeof(*window->num_pages));
879                 } else {
880                         if (scifdev_is_p2p(ep->remote_dev)) {
881                                 /*
882                                  * add remote node's base address for this node
883                                  * to convert it into a MIC address
884                                  */
885                                 int m;
886                                 dma_addr_t dma_addr;
887
888                                 for (m = 0; m < loop_nr_contig_chunks; m++) {
889                                         dma_addr = window->dma_addr[i + m] +
890                                                 ep->remote_dev->base_addr;
891                                         writeq(dma_addr,
892                                                (void __force __iomem *)&tmp[m]);
893                                 }
894                                 memcpy_toio((void __force __iomem *)tmp1,
895                                             &window->num_pages[i],
896                                             loop_nr_contig_chunks
897                                             * sizeof(*window->num_pages));
898                         } else {
899                                 /* Mgmt node or loopback - transfer DMA
900                                  * addresses as is, this is the same as a
901                                  * MIC physical address (we use the dma_addr
902                                  * and not the phys_addr array since the
903                                  * phys_addr is only setup if there is a mmap()
904                                  * request from the mgmt node)
905                                  */
906                                 memcpy_toio((void __force __iomem *)tmp,
907                                             &window->dma_addr[i],
908                                             loop_nr_contig_chunks *
909                                             sizeof(*window->dma_addr));
910                                 memcpy_toio((void __force __iomem *)tmp1,
911                                             &window->num_pages[i],
912                                             loop_nr_contig_chunks *
913                                             sizeof(*window->num_pages));
914                         }
915                 }
916                 remaining_nr_contig_chunks -= loop_nr_contig_chunks;
917                 i += loop_nr_contig_chunks;
918                 j++;
919                 scif_iounmap(tmp, loop_nr_contig_chunks *
920                              sizeof(*window->dma_addr), ep->remote_dev);
921                 scif_iounmap(tmp1, loop_nr_contig_chunks *
922                              sizeof(*window->num_pages), ep->remote_dev);
923         }
924
925         /* Prepare the remote window for the peer */
926         remote_window->peer_window = (u64)window;
927         remote_window->offset = window->offset;
928         remote_window->prot = window->prot;
929         remote_window->nr_contig_chunks = nr_contig_chunks;
930         remote_window->ep = ep->remote_ep;
931         scif_iounmap(num_pages_lookup,
932                      nr_lookup *
933                      sizeof(*remote_window->num_pages_lookup.lookup),
934                      ep->remote_dev);
935         scif_iounmap(dma_phys_lookup,
936                      nr_lookup *
937                      sizeof(*remote_window->dma_addr_lookup.lookup),
938                      ep->remote_dev);
939         scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
940         window->peer_window = alloc->vaddr;
941         return err;
942 }
943
944 /**
945  * scif_send_scif_register:
946  * @ep: end point
947  * @window: self registration window
948  *
949  * Send a SCIF_REGISTER message if EP is connected and wait for a
950  * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
951  * message so that the peer can free its remote window allocated earlier.
952  */
953 static int scif_send_scif_register(struct scif_endpt *ep,
954                                    struct scif_window *window)
955 {
956         int err = 0;
957         struct scifmsg msg;
958
959         msg.src = ep->port;
960         msg.payload[0] = ep->remote_ep;
961         msg.payload[1] = window->alloc_handle.vaddr;
962         msg.payload[2] = (u64)window;
963         spin_lock(&ep->lock);
964         if (ep->state == SCIFEP_CONNECTED) {
965                 msg.uop = SCIF_REGISTER;
966                 window->reg_state = OP_IN_PROGRESS;
967                 err = _scif_nodeqp_send(ep->remote_dev, &msg);
968                 spin_unlock(&ep->lock);
969                 if (!err) {
970 retry:
971                         /* Wait for a SCIF_REGISTER_(N)ACK message */
972                         err = wait_event_timeout(window->regwq,
973                                                  window->reg_state !=
974                                                  OP_IN_PROGRESS,
975                                                  SCIF_NODE_ALIVE_TIMEOUT);
976                         if (!err && scifdev_alive(ep))
977                                 goto retry;
978                         err = !err ? -ENODEV : 0;
979                         if (window->reg_state == OP_FAILED)
980                                 err = -ENOTCONN;
981                 }
982         } else {
983                 msg.uop = SCIF_FREE_VIRT;
984                 msg.payload[3] = SCIF_REGISTER;
985                 err = _scif_nodeqp_send(ep->remote_dev, &msg);
986                 spin_unlock(&ep->lock);
987                 if (!err)
988                         err = -ENOTCONN;
989         }
990         return err;
991 }
992
993 /**
994  * scif_get_window_offset:
995  * @ep: end point descriptor
996  * @flags: flags
997  * @offset: offset hint
998  * @num_pages: number of pages
999  * @out_offset: computed offset returned by reference.
1000  *
1001  * Compute/Claim a new offset for this EP.
1002  */
1003 int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
1004                            int num_pages, s64 *out_offset)
1005 {
1006         s64 page_index;
1007         struct iova *iova_ptr;
1008         int err = 0;
1009
1010         if (flags & SCIF_MAP_FIXED) {
1011                 page_index = SCIF_IOVA_PFN(offset);
1012                 iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1013                                         page_index + num_pages - 1);
1014                 if (!iova_ptr)
1015                         err = -EADDRINUSE;
1016         } else {
1017                 iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1018                                       SCIF_DMA_63BIT_PFN - 1, 0);
1019                 if (!iova_ptr)
1020                         err = -ENOMEM;
1021         }
1022         if (!err)
1023                 *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1024         return err;
1025 }
1026
1027 /**
1028  * scif_free_window_offset:
1029  * @ep: end point descriptor
1030  * @window: registration window
1031  * @offset: Offset to be freed
1032  *
1033  * Free offset for this EP. The callee is supposed to grab
1034  * the RMA mutex before calling this API.
1035  */
1036 void scif_free_window_offset(struct scif_endpt *ep,
1037                              struct scif_window *window, s64 offset)
1038 {
1039         if ((window && !window->offset_freed) || !window) {
1040                 free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1041                 if (window)
1042                         window->offset_freed = true;
1043         }
1044 }
1045
1046 /**
1047  * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1048  * @msg:        Interrupt message
1049  *
1050  * Remote side is requesting a memory allocation.
1051  */
1052 void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1053 {
1054         int err;
1055         struct scif_window *window = NULL;
1056         int nr_pages = msg->payload[1];
1057
1058         window = scif_create_remote_window(scifdev, nr_pages);
1059         if (!window) {
1060                 err = -ENOMEM;
1061                 goto error;
1062         }
1063
1064         /* The peer's allocation request is granted */
1065         msg->uop = SCIF_ALLOC_GNT;
1066         msg->payload[0] = (u64)window;
1067         msg->payload[1] = window->mapped_offset;
1068         err = scif_nodeqp_send(scifdev, msg);
1069         if (err)
1070                 scif_destroy_remote_window(window);
1071         return;
1072 error:
1073         /* The peer's allocation request is rejected */
1074         dev_err(&scifdev->sdev->dev,
1075                 "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1076                 __func__, __LINE__, err, window, nr_pages);
1077         msg->uop = SCIF_ALLOC_REJ;
1078         scif_nodeqp_send(scifdev, msg);
1079 }
1080
1081 /**
1082  * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1083  * @msg:        Interrupt message
1084  *
1085  * Remote side responded to a memory allocation.
1086  */
1087 void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1088 {
1089         struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1090         struct scif_window *window = container_of(handle, struct scif_window,
1091                                                   alloc_handle);
1092         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1093
1094         mutex_lock(&ep->rma_info.rma_lock);
1095         handle->vaddr = msg->payload[0];
1096         handle->phys_addr = msg->payload[1];
1097         if (msg->uop == SCIF_ALLOC_GNT)
1098                 handle->state = OP_COMPLETED;
1099         else
1100                 handle->state = OP_FAILED;
1101         wake_up(&handle->allocwq);
1102         mutex_unlock(&ep->rma_info.rma_lock);
1103 }
1104
1105 /**
1106  * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1107  * @msg:        Interrupt message
1108  *
1109  * Free up memory kmalloc'd earlier.
1110  */
1111 void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1112 {
1113         struct scif_window *window = (struct scif_window *)msg->payload[1];
1114
1115         scif_destroy_remote_window(window);
1116 }
1117
1118 static void
1119 scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1120 {
1121         int j;
1122         struct scif_hw_dev *sdev = dev->sdev;
1123         phys_addr_t apt_base = 0;
1124
1125         /*
1126          * Add the aperture base if the DMA address is not card relative
1127          * since the DMA addresses need to be an offset into the bar
1128          */
1129         if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1130             sdev->aper && !sdev->card_rel_da)
1131                 apt_base = sdev->aper->pa;
1132         else
1133                 return;
1134
1135         for (j = 0; j < window->nr_contig_chunks; j++) {
1136                 if (window->num_pages[j])
1137                         window->dma_addr[j] += apt_base;
1138                 else
1139                         break;
1140         }
1141 }
1142
1143 /**
1144  * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1145  * @msg:        Interrupt message
1146  *
1147  * Update remote window list with a new registered window.
1148  */
1149 void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1150 {
1151         struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1152         struct scif_window *window =
1153                 (struct scif_window *)msg->payload[1];
1154
1155         mutex_lock(&ep->rma_info.rma_lock);
1156         spin_lock(&ep->lock);
1157         if (ep->state == SCIFEP_CONNECTED) {
1158                 msg->uop = SCIF_REGISTER_ACK;
1159                 scif_nodeqp_send(ep->remote_dev, msg);
1160                 scif_fixup_aper_base(ep->remote_dev, window);
1161                 /* No further failures expected. Insert new window */
1162                 scif_insert_window(window, &ep->rma_info.remote_reg_list);
1163         } else {
1164                 msg->uop = SCIF_REGISTER_NACK;
1165                 scif_nodeqp_send(ep->remote_dev, msg);
1166         }
1167         spin_unlock(&ep->lock);
1168         mutex_unlock(&ep->rma_info.rma_lock);
1169         /* free up any lookup resources now that page lists are transferred */
1170         scif_destroy_remote_lookup(ep->remote_dev, window);
1171         /*
1172          * We could not insert the window but we need to
1173          * destroy the window.
1174          */
1175         if (msg->uop == SCIF_REGISTER_NACK)
1176                 scif_destroy_remote_window(window);
1177 }
1178
1179 /**
1180  * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1181  * @msg:        Interrupt message
1182  *
1183  * Remove window from remote registration list;
1184  */
1185 void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1186 {
1187         struct scif_rma_req req;
1188         struct scif_window *window = NULL;
1189         struct scif_window *recv_window =
1190                 (struct scif_window *)msg->payload[0];
1191         struct scif_endpt *ep;
1192         int del_window = 0;
1193
1194         ep = (struct scif_endpt *)recv_window->ep;
1195         req.out_window = &window;
1196         req.offset = recv_window->offset;
1197         req.prot = 0;
1198         req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1199         req.type = SCIF_WINDOW_FULL;
1200         req.head = &ep->rma_info.remote_reg_list;
1201         msg->payload[0] = ep->remote_ep;
1202
1203         mutex_lock(&ep->rma_info.rma_lock);
1204         /* Does a valid window exist? */
1205         if (scif_query_window(&req)) {
1206                 dev_err(&scifdev->sdev->dev,
1207                         "%s %d -ENXIO\n", __func__, __LINE__);
1208                 msg->uop = SCIF_UNREGISTER_ACK;
1209                 goto error;
1210         }
1211         if (window) {
1212                 if (window->ref_count)
1213                         scif_put_window(window, window->nr_pages);
1214                 else
1215                         dev_err(&scifdev->sdev->dev,
1216                                 "%s %d ref count should be +ve\n",
1217                                 __func__, __LINE__);
1218                 window->unreg_state = OP_COMPLETED;
1219                 if (!window->ref_count) {
1220                         msg->uop = SCIF_UNREGISTER_ACK;
1221                         atomic_inc(&ep->rma_info.tw_refcount);
1222                         ep->rma_info.async_list_del = 1;
1223                         list_del_init(&window->list);
1224                         del_window = 1;
1225                 } else {
1226                         /* NACK! There are valid references to this window */
1227                         msg->uop = SCIF_UNREGISTER_NACK;
1228                 }
1229         } else {
1230                 /* The window did not make its way to the list at all. ACK */
1231                 msg->uop = SCIF_UNREGISTER_ACK;
1232                 scif_destroy_remote_window(recv_window);
1233         }
1234 error:
1235         mutex_unlock(&ep->rma_info.rma_lock);
1236         if (del_window)
1237                 scif_drain_dma_intr(ep->remote_dev->sdev,
1238                                     ep->rma_info.dma_chan);
1239         scif_nodeqp_send(ep->remote_dev, msg);
1240         if (del_window)
1241                 scif_queue_for_cleanup(window, &scif_info.rma);
1242 }
1243
1244 /**
1245  * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1246  * @msg:        Interrupt message
1247  *
1248  * Wake up the window waiting to complete registration.
1249  */
1250 void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1251 {
1252         struct scif_window *window =
1253                 (struct scif_window *)msg->payload[2];
1254         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1255
1256         mutex_lock(&ep->rma_info.rma_lock);
1257         window->reg_state = OP_COMPLETED;
1258         wake_up(&window->regwq);
1259         mutex_unlock(&ep->rma_info.rma_lock);
1260 }
1261
1262 /**
1263  * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1264  * @msg:        Interrupt message
1265  *
1266  * Wake up the window waiting to inform it that registration
1267  * cannot be completed.
1268  */
1269 void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1270 {
1271         struct scif_window *window =
1272                 (struct scif_window *)msg->payload[2];
1273         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1274
1275         mutex_lock(&ep->rma_info.rma_lock);
1276         window->reg_state = OP_FAILED;
1277         wake_up(&window->regwq);
1278         mutex_unlock(&ep->rma_info.rma_lock);
1279 }
1280
1281 /**
1282  * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1283  * @msg:        Interrupt message
1284  *
1285  * Wake up the window waiting to complete unregistration.
1286  */
1287 void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1288 {
1289         struct scif_window *window =
1290                 (struct scif_window *)msg->payload[1];
1291         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1292
1293         mutex_lock(&ep->rma_info.rma_lock);
1294         window->unreg_state = OP_COMPLETED;
1295         wake_up(&window->unregwq);
1296         mutex_unlock(&ep->rma_info.rma_lock);
1297 }
1298
1299 /**
1300  * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1301  * @msg:        Interrupt message
1302  *
1303  * Wake up the window waiting to inform it that unregistration
1304  * cannot be completed immediately.
1305  */
1306 void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1307 {
1308         struct scif_window *window =
1309                 (struct scif_window *)msg->payload[1];
1310         struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1311
1312         mutex_lock(&ep->rma_info.rma_lock);
1313         window->unreg_state = OP_FAILED;
1314         wake_up(&window->unregwq);
1315         mutex_unlock(&ep->rma_info.rma_lock);
1316 }
1317
1318 int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1319                      int map_flags, scif_pinned_pages_t *pages)
1320 {
1321         struct scif_pinned_pages *pinned_pages;
1322         int nr_pages, err = 0, i;
1323         bool vmalloc_addr = false;
1324         bool try_upgrade = false;
1325         int prot = *out_prot;
1326         int ulimit = 0;
1327         struct mm_struct *mm = NULL;
1328
1329         /* Unsupported flags */
1330         if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1331                 return -EINVAL;
1332         ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1333
1334         /* Unsupported protection requested */
1335         if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1336                 return -EINVAL;
1337
1338         /* addr/len must be page aligned. len should be non zero */
1339         if (!len ||
1340             (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1341             (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1342                 return -EINVAL;
1343
1344         might_sleep();
1345
1346         nr_pages = len >> PAGE_SHIFT;
1347
1348         /* Allocate a set of pinned pages */
1349         pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1350         if (!pinned_pages)
1351                 return -ENOMEM;
1352
1353         if (map_flags & SCIF_MAP_KERNEL) {
1354                 if (is_vmalloc_addr(addr))
1355                         vmalloc_addr = true;
1356
1357                 for (i = 0; i < nr_pages; i++) {
1358                         if (vmalloc_addr)
1359                                 pinned_pages->pages[i] =
1360                                         vmalloc_to_page(addr + (i * PAGE_SIZE));
1361                         else
1362                                 pinned_pages->pages[i] =
1363                                         virt_to_page(addr + (i * PAGE_SIZE));
1364                 }
1365                 pinned_pages->nr_pages = nr_pages;
1366                 pinned_pages->map_flags = SCIF_MAP_KERNEL;
1367         } else {
1368                 /*
1369                  * SCIF supports registration caching. If a registration has
1370                  * been requested with read only permissions, then we try
1371                  * to pin the pages with RW permissions so that a subsequent
1372                  * transfer with RW permission can hit the cache instead of
1373                  * invalidating it. If the upgrade fails with RW then we
1374                  * revert back to R permission and retry
1375                  */
1376                 if (prot == SCIF_PROT_READ)
1377                         try_upgrade = true;
1378                 prot |= SCIF_PROT_WRITE;
1379 retry:
1380                 mm = current->mm;
1381                 down_write(&mm->mmap_sem);
1382                 if (ulimit) {
1383                         err = __scif_check_inc_pinned_vm(mm, nr_pages);
1384                         if (err) {
1385                                 up_write(&mm->mmap_sem);
1386                                 pinned_pages->nr_pages = 0;
1387                                 goto error_unmap;
1388                         }
1389                 }
1390
1391                 pinned_pages->nr_pages = get_user_pages(
1392                                 current,
1393                                 mm,
1394                                 (u64)addr,
1395                                 nr_pages,
1396                                 !!(prot & SCIF_PROT_WRITE),
1397                                 0,
1398                                 pinned_pages->pages,
1399                                 NULL);
1400                 up_write(&mm->mmap_sem);
1401                 if (nr_pages != pinned_pages->nr_pages) {
1402                         if (try_upgrade) {
1403                                 if (ulimit)
1404                                         __scif_dec_pinned_vm_lock(mm,
1405                                                                   nr_pages, 0);
1406                                 /* Roll back any pinned pages */
1407                                 for (i = 0; i < pinned_pages->nr_pages; i++) {
1408                                         if (pinned_pages->pages[i])
1409                                                 put_page(
1410                                                 pinned_pages->pages[i]);
1411                                 }
1412                                 prot &= ~SCIF_PROT_WRITE;
1413                                 try_upgrade = false;
1414                                 goto retry;
1415                         }
1416                 }
1417                 pinned_pages->map_flags = 0;
1418         }
1419
1420         if (pinned_pages->nr_pages < nr_pages) {
1421                 err = -EFAULT;
1422                 pinned_pages->nr_pages = nr_pages;
1423                 goto dec_pinned;
1424         }
1425
1426         *out_prot = prot;
1427         atomic_set(&pinned_pages->ref_count, 1);
1428         *pages = pinned_pages;
1429         return err;
1430 dec_pinned:
1431         if (ulimit)
1432                 __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
1433         /* Something went wrong! Rollback */
1434 error_unmap:
1435         pinned_pages->nr_pages = nr_pages;
1436         scif_destroy_pinned_pages(pinned_pages);
1437         *pages = NULL;
1438         dev_dbg(scif_info.mdev.this_device,
1439                 "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1440         return err;
1441 }
1442
1443 int scif_pin_pages(void *addr, size_t len, int prot,
1444                    int map_flags, scif_pinned_pages_t *pages)
1445 {
1446         return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1447 }
1448 EXPORT_SYMBOL_GPL(scif_pin_pages);
1449
1450 int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1451 {
1452         int err = 0, ret;
1453
1454         if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1455                 return -EINVAL;
1456
1457         ret = atomic_sub_return(1, &pinned_pages->ref_count);
1458         if (ret < 0) {
1459                 dev_err(scif_info.mdev.this_device,
1460                         "%s %d scif_unpin_pages called without pinning? rc %d\n",
1461                         __func__, __LINE__, ret);
1462                 return -EINVAL;
1463         }
1464         /*
1465          * Destroy the window if the ref count for this set of pinned
1466          * pages has dropped to zero. If it is positive then there is
1467          * a valid registered window which is backed by these pages and
1468          * it will be destroyed once all such windows are unregistered.
1469          */
1470         if (!ret)
1471                 err = scif_destroy_pinned_pages(pinned_pages);
1472
1473         return err;
1474 }
1475 EXPORT_SYMBOL_GPL(scif_unpin_pages);
1476
1477 static inline void
1478 scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1479 {
1480         mutex_lock(&ep->rma_info.rma_lock);
1481         scif_insert_window(window, &ep->rma_info.reg_list);
1482         mutex_unlock(&ep->rma_info.rma_lock);
1483 }
1484
1485 off_t scif_register_pinned_pages(scif_epd_t epd,
1486                                  scif_pinned_pages_t pinned_pages,
1487                                  off_t offset, int map_flags)
1488 {
1489         struct scif_endpt *ep = (struct scif_endpt *)epd;
1490         s64 computed_offset;
1491         struct scif_window *window;
1492         int err;
1493         size_t len;
1494         struct device *spdev;
1495
1496         /* Unsupported flags */
1497         if (map_flags & ~SCIF_MAP_FIXED)
1498                 return -EINVAL;
1499
1500         len = pinned_pages->nr_pages << PAGE_SHIFT;
1501
1502         /*
1503          * Offset is not page aligned/negative or offset+len
1504          * wraps around with SCIF_MAP_FIXED.
1505          */
1506         if ((map_flags & SCIF_MAP_FIXED) &&
1507             ((ALIGN(offset, PAGE_SIZE) != offset) ||
1508             (offset < 0) ||
1509             (offset + (off_t)len < offset)))
1510                 return -EINVAL;
1511
1512         might_sleep();
1513
1514         err = scif_verify_epd(ep);
1515         if (err)
1516                 return err;
1517         /*
1518          * It is an error to pass pinned_pages to scif_register_pinned_pages()
1519          * after calling scif_unpin_pages().
1520          */
1521         if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1522                 return -EINVAL;
1523
1524         /* Compute the offset for this registration */
1525         err = scif_get_window_offset(ep, map_flags, offset,
1526                                      len, &computed_offset);
1527         if (err) {
1528                 atomic_sub(1, &pinned_pages->ref_count);
1529                 return err;
1530         }
1531
1532         /* Allocate and prepare self registration window */
1533         window = scif_create_window(ep, pinned_pages->nr_pages,
1534                                     computed_offset, false);
1535         if (!window) {
1536                 atomic_sub(1, &pinned_pages->ref_count);
1537                 scif_free_window_offset(ep, NULL, computed_offset);
1538                 return -ENOMEM;
1539         }
1540
1541         window->pinned_pages = pinned_pages;
1542         window->nr_pages = pinned_pages->nr_pages;
1543         window->prot = pinned_pages->prot;
1544
1545         spdev = scif_get_peer_dev(ep->remote_dev);
1546         if (IS_ERR(spdev)) {
1547                 err = PTR_ERR(spdev);
1548                 scif_destroy_window(ep, window);
1549                 return err;
1550         }
1551         err = scif_send_alloc_request(ep, window);
1552         if (err) {
1553                 dev_err(&ep->remote_dev->sdev->dev,
1554                         "%s %d err %d\n", __func__, __LINE__, err);
1555                 goto error_unmap;
1556         }
1557
1558         /* Prepare the remote registration window */
1559         err = scif_prep_remote_window(ep, window);
1560         if (err) {
1561                 dev_err(&ep->remote_dev->sdev->dev,
1562                         "%s %d err %d\n", __func__, __LINE__, err);
1563                 goto error_unmap;
1564         }
1565
1566         /* Tell the peer about the new window */
1567         err = scif_send_scif_register(ep, window);
1568         if (err) {
1569                 dev_err(&ep->remote_dev->sdev->dev,
1570                         "%s %d err %d\n", __func__, __LINE__, err);
1571                 goto error_unmap;
1572         }
1573
1574         scif_put_peer_dev(spdev);
1575         /* No further failures expected. Insert new window */
1576         scif_insert_local_window(window, ep);
1577         return computed_offset;
1578 error_unmap:
1579         scif_destroy_window(ep, window);
1580         scif_put_peer_dev(spdev);
1581         dev_err(&ep->remote_dev->sdev->dev,
1582                 "%s %d err %d\n", __func__, __LINE__, err);
1583         return err;
1584 }
1585 EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1586
1587 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1588                     int prot, int map_flags)
1589 {
1590         scif_pinned_pages_t pinned_pages;
1591         off_t err;
1592         struct scif_endpt *ep = (struct scif_endpt *)epd;
1593         s64 computed_offset;
1594         struct scif_window *window;
1595         struct mm_struct *mm = NULL;
1596         struct device *spdev;
1597
1598         dev_dbg(scif_info.mdev.this_device,
1599                 "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1600                 epd, addr, len, offset, prot, map_flags);
1601         /* Unsupported flags */
1602         if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1603                 return -EINVAL;
1604
1605         /*
1606          * Offset is not page aligned/negative or offset+len
1607          * wraps around with SCIF_MAP_FIXED.
1608          */
1609         if ((map_flags & SCIF_MAP_FIXED) &&
1610             ((ALIGN(offset, PAGE_SIZE) != offset) ||
1611             (offset < 0) ||
1612             (offset + (off_t)len < offset)))
1613                 return -EINVAL;
1614
1615         /* Unsupported protection requested */
1616         if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1617                 return -EINVAL;
1618
1619         /* addr/len must be page aligned. len should be non zero */
1620         if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1621             (ALIGN(len, PAGE_SIZE) != len))
1622                 return -EINVAL;
1623
1624         might_sleep();
1625
1626         err = scif_verify_epd(ep);
1627         if (err)
1628                 return err;
1629
1630         /* Compute the offset for this registration */
1631         err = scif_get_window_offset(ep, map_flags, offset,
1632                                      len >> PAGE_SHIFT, &computed_offset);
1633         if (err)
1634                 return err;
1635
1636         spdev = scif_get_peer_dev(ep->remote_dev);
1637         if (IS_ERR(spdev)) {
1638                 err = PTR_ERR(spdev);
1639                 scif_free_window_offset(ep, NULL, computed_offset);
1640                 return err;
1641         }
1642         /* Allocate and prepare self registration window */
1643         window = scif_create_window(ep, len >> PAGE_SHIFT,
1644                                     computed_offset, false);
1645         if (!window) {
1646                 scif_free_window_offset(ep, NULL, computed_offset);
1647                 scif_put_peer_dev(spdev);
1648                 return -ENOMEM;
1649         }
1650
1651         window->nr_pages = len >> PAGE_SHIFT;
1652
1653         err = scif_send_alloc_request(ep, window);
1654         if (err) {
1655                 scif_destroy_incomplete_window(ep, window);
1656                 scif_put_peer_dev(spdev);
1657                 return err;
1658         }
1659
1660         if (!(map_flags & SCIF_MAP_KERNEL)) {
1661                 mm = __scif_acquire_mm();
1662                 map_flags |= SCIF_MAP_ULIMIT;
1663         }
1664         /* Pin down the pages */
1665         err = __scif_pin_pages(addr, len, &prot,
1666                                map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1667                                &pinned_pages);
1668         if (err) {
1669                 scif_destroy_incomplete_window(ep, window);
1670                 __scif_release_mm(mm);
1671                 goto error;
1672         }
1673
1674         window->pinned_pages = pinned_pages;
1675         window->prot = pinned_pages->prot;
1676         window->mm = mm;
1677
1678         /* Prepare the remote registration window */
1679         err = scif_prep_remote_window(ep, window);
1680         if (err) {
1681                 dev_err(&ep->remote_dev->sdev->dev,
1682                         "%s %d err %ld\n", __func__, __LINE__, err);
1683                 goto error_unmap;
1684         }
1685
1686         /* Tell the peer about the new window */
1687         err = scif_send_scif_register(ep, window);
1688         if (err) {
1689                 dev_err(&ep->remote_dev->sdev->dev,
1690                         "%s %d err %ld\n", __func__, __LINE__, err);
1691                 goto error_unmap;
1692         }
1693
1694         scif_put_peer_dev(spdev);
1695         /* No further failures expected. Insert new window */
1696         scif_insert_local_window(window, ep);
1697         dev_dbg(&ep->remote_dev->sdev->dev,
1698                 "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1699                 epd, addr, len, computed_offset);
1700         return computed_offset;
1701 error_unmap:
1702         scif_destroy_window(ep, window);
1703 error:
1704         scif_put_peer_dev(spdev);
1705         dev_err(&ep->remote_dev->sdev->dev,
1706                 "%s %d err %ld\n", __func__, __LINE__, err);
1707         return err;
1708 }
1709 EXPORT_SYMBOL_GPL(scif_register);
1710
1711 int
1712 scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1713 {
1714         struct scif_endpt *ep = (struct scif_endpt *)epd;
1715         struct scif_window *window = NULL;
1716         struct scif_rma_req req;
1717         int nr_pages, err;
1718         struct device *spdev;
1719
1720         dev_dbg(scif_info.mdev.this_device,
1721                 "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1722                 ep, offset, len);
1723         /* len must be page aligned. len should be non zero */
1724         if (!len ||
1725             (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1726                 return -EINVAL;
1727
1728         /* Offset is not page aligned or offset+len wraps around */
1729         if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1730             (offset + (off_t)len < offset))
1731                 return -EINVAL;
1732
1733         err = scif_verify_epd(ep);
1734         if (err)
1735                 return err;
1736
1737         might_sleep();
1738         nr_pages = len >> PAGE_SHIFT;
1739
1740         req.out_window = &window;
1741         req.offset = offset;
1742         req.prot = 0;
1743         req.nr_bytes = len;
1744         req.type = SCIF_WINDOW_FULL;
1745         req.head = &ep->rma_info.reg_list;
1746
1747         spdev = scif_get_peer_dev(ep->remote_dev);
1748         if (IS_ERR(spdev)) {
1749                 err = PTR_ERR(spdev);
1750                 return err;
1751         }
1752         mutex_lock(&ep->rma_info.rma_lock);
1753         /* Does a valid window exist? */
1754         err = scif_query_window(&req);
1755         if (err) {
1756                 dev_err(&ep->remote_dev->sdev->dev,
1757                         "%s %d err %d\n", __func__, __LINE__, err);
1758                 goto error;
1759         }
1760         /* Unregister all the windows in this range */
1761         err = scif_rma_list_unregister(window, offset, nr_pages);
1762         if (err)
1763                 dev_err(&ep->remote_dev->sdev->dev,
1764                         "%s %d err %d\n", __func__, __LINE__, err);
1765 error:
1766         mutex_unlock(&ep->rma_info.rma_lock);
1767         scif_put_peer_dev(spdev);
1768         return err;
1769 }
1770 EXPORT_SYMBOL_GPL(scif_unregister);