Merge branch 'sfi' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux into...
[firefly-linux-kernel-4.4.55.git] / drivers / infiniband / hw / ehca / ehca_mrmw.c
1 /*
2  *  IBM eServer eHCA Infiniband device driver for Linux on POWER
3  *
4  *  MR/MW functions
5  *
6  *  Authors: Dietmar Decker <ddecker@de.ibm.com>
7  *           Christoph Raisch <raisch@de.ibm.com>
8  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9  *
10  *  Copyright (c) 2005 IBM Corporation
11  *
12  *  All rights reserved.
13  *
14  *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
15  *  BSD.
16  *
17  * OpenIB BSD License
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are met:
21  *
22  * Redistributions of source code must retain the above copyright notice, this
23  * list of conditions and the following disclaimer.
24  *
25  * Redistributions in binary form must reproduce the above copyright notice,
26  * this list of conditions and the following disclaimer in the documentation
27  * and/or other materials
28  * provided with the distribution.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40  * POSSIBILITY OF SUCH DAMAGE.
41  */
42
43 #include <linux/slab.h>
44 #include <rdma/ib_umem.h>
45
46 #include "ehca_iverbs.h"
47 #include "ehca_mrmw.h"
48 #include "hcp_if.h"
49 #include "hipz_hw.h"
50
51 #define NUM_CHUNKS(length, chunk_size) \
52         (((length) + (chunk_size - 1)) / (chunk_size))
53
54 /* max number of rpages (per hcall register_rpages) */
55 #define MAX_RPAGES 512
56
57 /* DMEM toleration management */
58 #define EHCA_SECTSHIFT        SECTION_SIZE_BITS
59 #define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
60 #define EHCA_HUGEPAGESHIFT     34
61 #define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
62 #define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
63 #define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
64 #define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
65 #define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
66 #define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
67 #define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
68 #define EHCA_DIR_MAP_SIZE (0x10000)
69 #define EHCA_ENT_MAP_SIZE (0x10000)
70 #define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
71
72 static unsigned long ehca_mr_len;
73
74 /*
75  * Memory map data structures
76  */
77 struct ehca_dir_bmap {
78         u64 ent[EHCA_MAP_ENTRIES];
79 };
80 struct ehca_top_bmap {
81         struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
82 };
83 struct ehca_bmap {
84         struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
85 };
86
87 static struct ehca_bmap *ehca_bmap;
88
89 static struct kmem_cache *mr_cache;
90 static struct kmem_cache *mw_cache;
91
92 enum ehca_mr_pgsize {
93         EHCA_MR_PGSIZE4K  = 0x1000L,
94         EHCA_MR_PGSIZE64K = 0x10000L,
95         EHCA_MR_PGSIZE1M  = 0x100000L,
96         EHCA_MR_PGSIZE16M = 0x1000000L
97 };
98
99 #define EHCA_MR_PGSHIFT4K  12
100 #define EHCA_MR_PGSHIFT64K 16
101 #define EHCA_MR_PGSHIFT1M  20
102 #define EHCA_MR_PGSHIFT16M 24
103
104 static u64 ehca_map_vaddr(void *caddr);
105
106 static u32 ehca_encode_hwpage_size(u32 pgsize)
107 {
108         int log = ilog2(pgsize);
109         WARN_ON(log < 12 || log > 24 || log & 3);
110         return (log - 12) / 4;
111 }
112
113 static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
114 {
115         return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
116 }
117
118 static struct ehca_mr *ehca_mr_new(void)
119 {
120         struct ehca_mr *me;
121
122         me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
123         if (me)
124                 spin_lock_init(&me->mrlock);
125         else
126                 ehca_gen_err("alloc failed");
127
128         return me;
129 }
130
131 static void ehca_mr_delete(struct ehca_mr *me)
132 {
133         kmem_cache_free(mr_cache, me);
134 }
135
136 static struct ehca_mw *ehca_mw_new(void)
137 {
138         struct ehca_mw *me;
139
140         me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
141         if (me)
142                 spin_lock_init(&me->mwlock);
143         else
144                 ehca_gen_err("alloc failed");
145
146         return me;
147 }
148
149 static void ehca_mw_delete(struct ehca_mw *me)
150 {
151         kmem_cache_free(mw_cache, me);
152 }
153
154 /*----------------------------------------------------------------------*/
155
156 struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
157 {
158         struct ib_mr *ib_mr;
159         int ret;
160         struct ehca_mr *e_maxmr;
161         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
162         struct ehca_shca *shca =
163                 container_of(pd->device, struct ehca_shca, ib_device);
164
165         if (shca->maxmr) {
166                 e_maxmr = ehca_mr_new();
167                 if (!e_maxmr) {
168                         ehca_err(&shca->ib_device, "out of memory");
169                         ib_mr = ERR_PTR(-ENOMEM);
170                         goto get_dma_mr_exit0;
171                 }
172
173                 ret = ehca_reg_maxmr(shca, e_maxmr,
174                                      (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
175                                      mr_access_flags, e_pd,
176                                      &e_maxmr->ib.ib_mr.lkey,
177                                      &e_maxmr->ib.ib_mr.rkey);
178                 if (ret) {
179                         ehca_mr_delete(e_maxmr);
180                         ib_mr = ERR_PTR(ret);
181                         goto get_dma_mr_exit0;
182                 }
183                 ib_mr = &e_maxmr->ib.ib_mr;
184         } else {
185                 ehca_err(&shca->ib_device, "no internal max-MR exist!");
186                 ib_mr = ERR_PTR(-EINVAL);
187                 goto get_dma_mr_exit0;
188         }
189
190 get_dma_mr_exit0:
191         if (IS_ERR(ib_mr))
192                 ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
193                          PTR_ERR(ib_mr), pd, mr_access_flags);
194         return ib_mr;
195 } /* end ehca_get_dma_mr() */
196
197 /*----------------------------------------------------------------------*/
198
199 struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
200                                struct ib_phys_buf *phys_buf_array,
201                                int num_phys_buf,
202                                int mr_access_flags,
203                                u64 *iova_start)
204 {
205         struct ib_mr *ib_mr;
206         int ret;
207         struct ehca_mr *e_mr;
208         struct ehca_shca *shca =
209                 container_of(pd->device, struct ehca_shca, ib_device);
210         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
211
212         u64 size;
213
214         if ((num_phys_buf <= 0) || !phys_buf_array) {
215                 ehca_err(pd->device, "bad input values: num_phys_buf=%x "
216                          "phys_buf_array=%p", num_phys_buf, phys_buf_array);
217                 ib_mr = ERR_PTR(-EINVAL);
218                 goto reg_phys_mr_exit0;
219         }
220         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
221              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
222             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
223              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
224                 /*
225                  * Remote Write Access requires Local Write Access
226                  * Remote Atomic Access requires Local Write Access
227                  */
228                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
229                          mr_access_flags);
230                 ib_mr = ERR_PTR(-EINVAL);
231                 goto reg_phys_mr_exit0;
232         }
233
234         /* check physical buffer list and calculate size */
235         ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
236                                             iova_start, &size);
237         if (ret) {
238                 ib_mr = ERR_PTR(ret);
239                 goto reg_phys_mr_exit0;
240         }
241         if ((size == 0) ||
242             (((u64)iova_start + size) < (u64)iova_start)) {
243                 ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
244                          size, iova_start);
245                 ib_mr = ERR_PTR(-EINVAL);
246                 goto reg_phys_mr_exit0;
247         }
248
249         e_mr = ehca_mr_new();
250         if (!e_mr) {
251                 ehca_err(pd->device, "out of memory");
252                 ib_mr = ERR_PTR(-ENOMEM);
253                 goto reg_phys_mr_exit0;
254         }
255
256         /* register MR on HCA */
257         if (ehca_mr_is_maxmr(size, iova_start)) {
258                 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
259                 ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
260                                      e_pd, &e_mr->ib.ib_mr.lkey,
261                                      &e_mr->ib.ib_mr.rkey);
262                 if (ret) {
263                         ib_mr = ERR_PTR(ret);
264                         goto reg_phys_mr_exit1;
265                 }
266         } else {
267                 struct ehca_mr_pginfo pginfo;
268                 u32 num_kpages;
269                 u32 num_hwpages;
270                 u64 hw_pgsize;
271
272                 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
273                                         PAGE_SIZE);
274                 /* for kernel space we try most possible pgsize */
275                 hw_pgsize = ehca_get_max_hwpage_size(shca);
276                 num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
277                                          hw_pgsize);
278                 memset(&pginfo, 0, sizeof(pginfo));
279                 pginfo.type = EHCA_MR_PGI_PHYS;
280                 pginfo.num_kpages = num_kpages;
281                 pginfo.hwpage_size = hw_pgsize;
282                 pginfo.num_hwpages = num_hwpages;
283                 pginfo.u.phy.num_phys_buf = num_phys_buf;
284                 pginfo.u.phy.phys_buf_array = phys_buf_array;
285                 pginfo.next_hwpage =
286                         ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
287
288                 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
289                                   e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
290                                   &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
291                 if (ret) {
292                         ib_mr = ERR_PTR(ret);
293                         goto reg_phys_mr_exit1;
294                 }
295         }
296
297         /* successful registration of all pages */
298         return &e_mr->ib.ib_mr;
299
300 reg_phys_mr_exit1:
301         ehca_mr_delete(e_mr);
302 reg_phys_mr_exit0:
303         if (IS_ERR(ib_mr))
304                 ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
305                          "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
306                          PTR_ERR(ib_mr), pd, phys_buf_array,
307                          num_phys_buf, mr_access_flags, iova_start);
308         return ib_mr;
309 } /* end ehca_reg_phys_mr() */
310
311 /*----------------------------------------------------------------------*/
312
313 struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
314                                u64 virt, int mr_access_flags,
315                                struct ib_udata *udata)
316 {
317         struct ib_mr *ib_mr;
318         struct ehca_mr *e_mr;
319         struct ehca_shca *shca =
320                 container_of(pd->device, struct ehca_shca, ib_device);
321         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
322         struct ehca_mr_pginfo pginfo;
323         int ret, page_shift;
324         u32 num_kpages;
325         u32 num_hwpages;
326         u64 hwpage_size;
327
328         if (!pd) {
329                 ehca_gen_err("bad pd=%p", pd);
330                 return ERR_PTR(-EFAULT);
331         }
332
333         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
334              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
335             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
336              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
337                 /*
338                  * Remote Write Access requires Local Write Access
339                  * Remote Atomic Access requires Local Write Access
340                  */
341                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
342                          mr_access_flags);
343                 ib_mr = ERR_PTR(-EINVAL);
344                 goto reg_user_mr_exit0;
345         }
346
347         if (length == 0 || virt + length < virt) {
348                 ehca_err(pd->device, "bad input values: length=%llx "
349                          "virt_base=%llx", length, virt);
350                 ib_mr = ERR_PTR(-EINVAL);
351                 goto reg_user_mr_exit0;
352         }
353
354         e_mr = ehca_mr_new();
355         if (!e_mr) {
356                 ehca_err(pd->device, "out of memory");
357                 ib_mr = ERR_PTR(-ENOMEM);
358                 goto reg_user_mr_exit0;
359         }
360
361         e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
362                                  mr_access_flags, 0);
363         if (IS_ERR(e_mr->umem)) {
364                 ib_mr = (void *)e_mr->umem;
365                 goto reg_user_mr_exit1;
366         }
367
368         if (e_mr->umem->page_size != PAGE_SIZE) {
369                 ehca_err(pd->device, "page size not supported, "
370                          "e_mr->umem->page_size=%x", e_mr->umem->page_size);
371                 ib_mr = ERR_PTR(-EINVAL);
372                 goto reg_user_mr_exit2;
373         }
374
375         /* determine number of MR pages */
376         num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
377         /* select proper hw_pgsize */
378         page_shift = PAGE_SHIFT;
379         if (e_mr->umem->hugetlb) {
380                 /* determine page_shift, clamp between 4K and 16M */
381                 page_shift = (fls64(length - 1) + 3) & ~3;
382                 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
383                                  EHCA_MR_PGSHIFT16M);
384         }
385         hwpage_size = 1UL << page_shift;
386
387         /* now that we have the desired page size, shift until it's
388          * supported, too. 4K is always supported, so this terminates.
389          */
390         while (!(hwpage_size & shca->hca_cap_mr_pgsize))
391                 hwpage_size >>= 4;
392
393 reg_user_mr_fallback:
394         num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
395         /* register MR on HCA */
396         memset(&pginfo, 0, sizeof(pginfo));
397         pginfo.type = EHCA_MR_PGI_USER;
398         pginfo.hwpage_size = hwpage_size;
399         pginfo.num_kpages = num_kpages;
400         pginfo.num_hwpages = num_hwpages;
401         pginfo.u.usr.region = e_mr->umem;
402         pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
403         pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
404         ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
405                           e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
406                           &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
407         if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
408                 ehca_warn(pd->device, "failed to register mr "
409                           "with hwpage_size=%llx", hwpage_size);
410                 ehca_info(pd->device, "try to register mr with "
411                           "kpage_size=%lx", PAGE_SIZE);
412                 /*
413                  * this means kpages are not contiguous for a hw page
414                  * try kernel page size as fallback solution
415                  */
416                 hwpage_size = PAGE_SIZE;
417                 goto reg_user_mr_fallback;
418         }
419         if (ret) {
420                 ib_mr = ERR_PTR(ret);
421                 goto reg_user_mr_exit2;
422         }
423
424         /* successful registration of all pages */
425         return &e_mr->ib.ib_mr;
426
427 reg_user_mr_exit2:
428         ib_umem_release(e_mr->umem);
429 reg_user_mr_exit1:
430         ehca_mr_delete(e_mr);
431 reg_user_mr_exit0:
432         if (IS_ERR(ib_mr))
433                 ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
434                          PTR_ERR(ib_mr), pd, mr_access_flags, udata);
435         return ib_mr;
436 } /* end ehca_reg_user_mr() */
437
438 /*----------------------------------------------------------------------*/
439
440 int ehca_rereg_phys_mr(struct ib_mr *mr,
441                        int mr_rereg_mask,
442                        struct ib_pd *pd,
443                        struct ib_phys_buf *phys_buf_array,
444                        int num_phys_buf,
445                        int mr_access_flags,
446                        u64 *iova_start)
447 {
448         int ret;
449
450         struct ehca_shca *shca =
451                 container_of(mr->device, struct ehca_shca, ib_device);
452         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
453         u64 new_size;
454         u64 *new_start;
455         u32 new_acl;
456         struct ehca_pd *new_pd;
457         u32 tmp_lkey, tmp_rkey;
458         unsigned long sl_flags;
459         u32 num_kpages = 0;
460         u32 num_hwpages = 0;
461         struct ehca_mr_pginfo pginfo;
462
463         if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
464                 /* TODO not supported, because PHYP rereg hCall needs pages */
465                 ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
466                          "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
467                 ret = -EINVAL;
468                 goto rereg_phys_mr_exit0;
469         }
470
471         if (mr_rereg_mask & IB_MR_REREG_PD) {
472                 if (!pd) {
473                         ehca_err(mr->device, "rereg with bad pd, pd=%p "
474                                  "mr_rereg_mask=%x", pd, mr_rereg_mask);
475                         ret = -EINVAL;
476                         goto rereg_phys_mr_exit0;
477                 }
478         }
479
480         if ((mr_rereg_mask &
481              ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
482             (mr_rereg_mask == 0)) {
483                 ret = -EINVAL;
484                 goto rereg_phys_mr_exit0;
485         }
486
487         /* check other parameters */
488         if (e_mr == shca->maxmr) {
489                 /* should be impossible, however reject to be sure */
490                 ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
491                          "shca->maxmr=%p mr->lkey=%x",
492                          mr, shca->maxmr, mr->lkey);
493                 ret = -EINVAL;
494                 goto rereg_phys_mr_exit0;
495         }
496         if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
497                 if (e_mr->flags & EHCA_MR_FLAG_FMR) {
498                         ehca_err(mr->device, "not supported for FMR, mr=%p "
499                                  "flags=%x", mr, e_mr->flags);
500                         ret = -EINVAL;
501                         goto rereg_phys_mr_exit0;
502                 }
503                 if (!phys_buf_array || num_phys_buf <= 0) {
504                         ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
505                                  " phys_buf_array=%p num_phys_buf=%x",
506                                  mr_rereg_mask, phys_buf_array, num_phys_buf);
507                         ret = -EINVAL;
508                         goto rereg_phys_mr_exit0;
509                 }
510         }
511         if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
512             (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
513               !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
514              ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
515               !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
516                 /*
517                  * Remote Write Access requires Local Write Access
518                  * Remote Atomic Access requires Local Write Access
519                  */
520                 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
521                          "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
522                 ret = -EINVAL;
523                 goto rereg_phys_mr_exit0;
524         }
525
526         /* set requested values dependent on rereg request */
527         spin_lock_irqsave(&e_mr->mrlock, sl_flags);
528         new_start = e_mr->start;
529         new_size = e_mr->size;
530         new_acl = e_mr->acl;
531         new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
532
533         if (mr_rereg_mask & IB_MR_REREG_TRANS) {
534                 u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
535
536                 new_start = iova_start; /* change address */
537                 /* check physical buffer list and calculate size */
538                 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
539                                                     num_phys_buf, iova_start,
540                                                     &new_size);
541                 if (ret)
542                         goto rereg_phys_mr_exit1;
543                 if ((new_size == 0) ||
544                     (((u64)iova_start + new_size) < (u64)iova_start)) {
545                         ehca_err(mr->device, "bad input values: new_size=%llx "
546                                  "iova_start=%p", new_size, iova_start);
547                         ret = -EINVAL;
548                         goto rereg_phys_mr_exit1;
549                 }
550                 num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
551                                         new_size, PAGE_SIZE);
552                 num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
553                                          new_size, hw_pgsize);
554                 memset(&pginfo, 0, sizeof(pginfo));
555                 pginfo.type = EHCA_MR_PGI_PHYS;
556                 pginfo.num_kpages = num_kpages;
557                 pginfo.hwpage_size = hw_pgsize;
558                 pginfo.num_hwpages = num_hwpages;
559                 pginfo.u.phy.num_phys_buf = num_phys_buf;
560                 pginfo.u.phy.phys_buf_array = phys_buf_array;
561                 pginfo.next_hwpage =
562                         ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
563         }
564         if (mr_rereg_mask & IB_MR_REREG_ACCESS)
565                 new_acl = mr_access_flags;
566         if (mr_rereg_mask & IB_MR_REREG_PD)
567                 new_pd = container_of(pd, struct ehca_pd, ib_pd);
568
569         ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
570                             new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
571         if (ret)
572                 goto rereg_phys_mr_exit1;
573
574         /* successful reregistration */
575         if (mr_rereg_mask & IB_MR_REREG_PD)
576                 mr->pd = pd;
577         mr->lkey = tmp_lkey;
578         mr->rkey = tmp_rkey;
579
580 rereg_phys_mr_exit1:
581         spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
582 rereg_phys_mr_exit0:
583         if (ret)
584                 ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
585                          "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
586                          "iova_start=%p",
587                          ret, mr, mr_rereg_mask, pd, phys_buf_array,
588                          num_phys_buf, mr_access_flags, iova_start);
589         return ret;
590 } /* end ehca_rereg_phys_mr() */
591
592 /*----------------------------------------------------------------------*/
593
594 int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
595 {
596         int ret = 0;
597         u64 h_ret;
598         struct ehca_shca *shca =
599                 container_of(mr->device, struct ehca_shca, ib_device);
600         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
601         unsigned long sl_flags;
602         struct ehca_mr_hipzout_parms hipzout;
603
604         if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
605                 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
606                          "e_mr->flags=%x", mr, e_mr, e_mr->flags);
607                 ret = -EINVAL;
608                 goto query_mr_exit0;
609         }
610
611         memset(mr_attr, 0, sizeof(struct ib_mr_attr));
612         spin_lock_irqsave(&e_mr->mrlock, sl_flags);
613
614         h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
615         if (h_ret != H_SUCCESS) {
616                 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
617                          "hca_hndl=%llx mr_hndl=%llx lkey=%x",
618                          h_ret, mr, shca->ipz_hca_handle.handle,
619                          e_mr->ipz_mr_handle.handle, mr->lkey);
620                 ret = ehca2ib_return_code(h_ret);
621                 goto query_mr_exit1;
622         }
623         mr_attr->pd = mr->pd;
624         mr_attr->device_virt_addr = hipzout.vaddr;
625         mr_attr->size = hipzout.len;
626         mr_attr->lkey = hipzout.lkey;
627         mr_attr->rkey = hipzout.rkey;
628         ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
629
630 query_mr_exit1:
631         spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
632 query_mr_exit0:
633         if (ret)
634                 ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
635                          ret, mr, mr_attr);
636         return ret;
637 } /* end ehca_query_mr() */
638
639 /*----------------------------------------------------------------------*/
640
641 int ehca_dereg_mr(struct ib_mr *mr)
642 {
643         int ret = 0;
644         u64 h_ret;
645         struct ehca_shca *shca =
646                 container_of(mr->device, struct ehca_shca, ib_device);
647         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
648
649         if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
650                 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
651                          "e_mr->flags=%x", mr, e_mr, e_mr->flags);
652                 ret = -EINVAL;
653                 goto dereg_mr_exit0;
654         } else if (e_mr == shca->maxmr) {
655                 /* should be impossible, however reject to be sure */
656                 ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
657                          "shca->maxmr=%p mr->lkey=%x",
658                          mr, shca->maxmr, mr->lkey);
659                 ret = -EINVAL;
660                 goto dereg_mr_exit0;
661         }
662
663         /* TODO: BUSY: MR still has bound window(s) */
664         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
665         if (h_ret != H_SUCCESS) {
666                 ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
667                          "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
668                          h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
669                          e_mr->ipz_mr_handle.handle, mr->lkey);
670                 ret = ehca2ib_return_code(h_ret);
671                 goto dereg_mr_exit0;
672         }
673
674         if (e_mr->umem)
675                 ib_umem_release(e_mr->umem);
676
677         /* successful deregistration */
678         ehca_mr_delete(e_mr);
679
680 dereg_mr_exit0:
681         if (ret)
682                 ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
683         return ret;
684 } /* end ehca_dereg_mr() */
685
686 /*----------------------------------------------------------------------*/
687
688 struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
689 {
690         struct ib_mw *ib_mw;
691         u64 h_ret;
692         struct ehca_mw *e_mw;
693         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
694         struct ehca_shca *shca =
695                 container_of(pd->device, struct ehca_shca, ib_device);
696         struct ehca_mw_hipzout_parms hipzout;
697
698         if (type != IB_MW_TYPE_1)
699                 return ERR_PTR(-EINVAL);
700
701         e_mw = ehca_mw_new();
702         if (!e_mw) {
703                 ib_mw = ERR_PTR(-ENOMEM);
704                 goto alloc_mw_exit0;
705         }
706
707         h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
708                                          e_pd->fw_pd, &hipzout);
709         if (h_ret != H_SUCCESS) {
710                 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
711                          "shca=%p hca_hndl=%llx mw=%p",
712                          h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
713                 ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
714                 goto alloc_mw_exit1;
715         }
716         /* successful MW allocation */
717         e_mw->ipz_mw_handle = hipzout.handle;
718         e_mw->ib_mw.rkey    = hipzout.rkey;
719         return &e_mw->ib_mw;
720
721 alloc_mw_exit1:
722         ehca_mw_delete(e_mw);
723 alloc_mw_exit0:
724         if (IS_ERR(ib_mw))
725                 ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
726         return ib_mw;
727 } /* end ehca_alloc_mw() */
728
729 /*----------------------------------------------------------------------*/
730
731 int ehca_bind_mw(struct ib_qp *qp,
732                  struct ib_mw *mw,
733                  struct ib_mw_bind *mw_bind)
734 {
735         /* TODO: not supported up to now */
736         ehca_gen_err("bind MW currently not supported by HCAD");
737
738         return -EPERM;
739 } /* end ehca_bind_mw() */
740
741 /*----------------------------------------------------------------------*/
742
743 int ehca_dealloc_mw(struct ib_mw *mw)
744 {
745         u64 h_ret;
746         struct ehca_shca *shca =
747                 container_of(mw->device, struct ehca_shca, ib_device);
748         struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
749
750         h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
751         if (h_ret != H_SUCCESS) {
752                 ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
753                          "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
754                          h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
755                          e_mw->ipz_mw_handle.handle);
756                 return ehca2ib_return_code(h_ret);
757         }
758         /* successful deallocation */
759         ehca_mw_delete(e_mw);
760         return 0;
761 } /* end ehca_dealloc_mw() */
762
763 /*----------------------------------------------------------------------*/
764
765 struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
766                               int mr_access_flags,
767                               struct ib_fmr_attr *fmr_attr)
768 {
769         struct ib_fmr *ib_fmr;
770         struct ehca_shca *shca =
771                 container_of(pd->device, struct ehca_shca, ib_device);
772         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
773         struct ehca_mr *e_fmr;
774         int ret;
775         u32 tmp_lkey, tmp_rkey;
776         struct ehca_mr_pginfo pginfo;
777         u64 hw_pgsize;
778
779         /* check other parameters */
780         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
781              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
782             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
783              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
784                 /*
785                  * Remote Write Access requires Local Write Access
786                  * Remote Atomic Access requires Local Write Access
787                  */
788                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
789                          mr_access_flags);
790                 ib_fmr = ERR_PTR(-EINVAL);
791                 goto alloc_fmr_exit0;
792         }
793         if (mr_access_flags & IB_ACCESS_MW_BIND) {
794                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
795                          mr_access_flags);
796                 ib_fmr = ERR_PTR(-EINVAL);
797                 goto alloc_fmr_exit0;
798         }
799         if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
800                 ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
801                          "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
802                          fmr_attr->max_pages, fmr_attr->max_maps,
803                          fmr_attr->page_shift);
804                 ib_fmr = ERR_PTR(-EINVAL);
805                 goto alloc_fmr_exit0;
806         }
807
808         hw_pgsize = 1 << fmr_attr->page_shift;
809         if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
810                 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
811                          fmr_attr->page_shift);
812                 ib_fmr = ERR_PTR(-EINVAL);
813                 goto alloc_fmr_exit0;
814         }
815
816         e_fmr = ehca_mr_new();
817         if (!e_fmr) {
818                 ib_fmr = ERR_PTR(-ENOMEM);
819                 goto alloc_fmr_exit0;
820         }
821         e_fmr->flags |= EHCA_MR_FLAG_FMR;
822
823         /* register MR on HCA */
824         memset(&pginfo, 0, sizeof(pginfo));
825         pginfo.hwpage_size = hw_pgsize;
826         /*
827          * pginfo.num_hwpages==0, ie register_rpages() will not be called
828          * but deferred to map_phys_fmr()
829          */
830         ret = ehca_reg_mr(shca, e_fmr, NULL,
831                           fmr_attr->max_pages * (1 << fmr_attr->page_shift),
832                           mr_access_flags, e_pd, &pginfo,
833                           &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
834         if (ret) {
835                 ib_fmr = ERR_PTR(ret);
836                 goto alloc_fmr_exit1;
837         }
838
839         /* successful */
840         e_fmr->hwpage_size = hw_pgsize;
841         e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
842         e_fmr->fmr_max_pages = fmr_attr->max_pages;
843         e_fmr->fmr_max_maps = fmr_attr->max_maps;
844         e_fmr->fmr_map_cnt = 0;
845         return &e_fmr->ib.ib_fmr;
846
847 alloc_fmr_exit1:
848         ehca_mr_delete(e_fmr);
849 alloc_fmr_exit0:
850         return ib_fmr;
851 } /* end ehca_alloc_fmr() */
852
853 /*----------------------------------------------------------------------*/
854
855 int ehca_map_phys_fmr(struct ib_fmr *fmr,
856                       u64 *page_list,
857                       int list_len,
858                       u64 iova)
859 {
860         int ret;
861         struct ehca_shca *shca =
862                 container_of(fmr->device, struct ehca_shca, ib_device);
863         struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
864         struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
865         struct ehca_mr_pginfo pginfo;
866         u32 tmp_lkey, tmp_rkey;
867
868         if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
869                 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
870                          e_fmr, e_fmr->flags);
871                 ret = -EINVAL;
872                 goto map_phys_fmr_exit0;
873         }
874         ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
875         if (ret)
876                 goto map_phys_fmr_exit0;
877         if (iova % e_fmr->fmr_page_size) {
878                 /* only whole-numbered pages */
879                 ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
880                          iova, e_fmr->fmr_page_size);
881                 ret = -EINVAL;
882                 goto map_phys_fmr_exit0;
883         }
884         if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
885                 /* HCAD does not limit the maps, however trace this anyway */
886                 ehca_info(fmr->device, "map limit exceeded, fmr=%p "
887                           "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
888                           fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
889         }
890
891         memset(&pginfo, 0, sizeof(pginfo));
892         pginfo.type = EHCA_MR_PGI_FMR;
893         pginfo.num_kpages = list_len;
894         pginfo.hwpage_size = e_fmr->hwpage_size;
895         pginfo.num_hwpages =
896                 list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
897         pginfo.u.fmr.page_list = page_list;
898         pginfo.next_hwpage =
899                 (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
900         pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
901
902         ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
903                             list_len * e_fmr->fmr_page_size,
904                             e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
905         if (ret)
906                 goto map_phys_fmr_exit0;
907
908         /* successful reregistration */
909         e_fmr->fmr_map_cnt++;
910         e_fmr->ib.ib_fmr.lkey = tmp_lkey;
911         e_fmr->ib.ib_fmr.rkey = tmp_rkey;
912         return 0;
913
914 map_phys_fmr_exit0:
915         if (ret)
916                 ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
917                          "iova=%llx", ret, fmr, page_list, list_len, iova);
918         return ret;
919 } /* end ehca_map_phys_fmr() */
920
921 /*----------------------------------------------------------------------*/
922
923 int ehca_unmap_fmr(struct list_head *fmr_list)
924 {
925         int ret = 0;
926         struct ib_fmr *ib_fmr;
927         struct ehca_shca *shca = NULL;
928         struct ehca_shca *prev_shca;
929         struct ehca_mr *e_fmr;
930         u32 num_fmr = 0;
931         u32 unmap_fmr_cnt = 0;
932
933         /* check all FMR belong to same SHCA, and check internal flag */
934         list_for_each_entry(ib_fmr, fmr_list, list) {
935                 prev_shca = shca;
936                 shca = container_of(ib_fmr->device, struct ehca_shca,
937                                     ib_device);
938                 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
939                 if ((shca != prev_shca) && prev_shca) {
940                         ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
941                                  "prev_shca=%p e_fmr=%p",
942                                  shca, prev_shca, e_fmr);
943                         ret = -EINVAL;
944                         goto unmap_fmr_exit0;
945                 }
946                 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
947                         ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
948                                  "e_fmr->flags=%x", e_fmr, e_fmr->flags);
949                         ret = -EINVAL;
950                         goto unmap_fmr_exit0;
951                 }
952                 num_fmr++;
953         }
954
955         /* loop over all FMRs to unmap */
956         list_for_each_entry(ib_fmr, fmr_list, list) {
957                 unmap_fmr_cnt++;
958                 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
959                 shca = container_of(ib_fmr->device, struct ehca_shca,
960                                     ib_device);
961                 ret = ehca_unmap_one_fmr(shca, e_fmr);
962                 if (ret) {
963                         /* unmap failed, stop unmapping of rest of FMRs */
964                         ehca_err(&shca->ib_device, "unmap of one FMR failed, "
965                                  "stop rest, e_fmr=%p num_fmr=%x "
966                                  "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
967                                  unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
968                         goto unmap_fmr_exit0;
969                 }
970         }
971
972 unmap_fmr_exit0:
973         if (ret)
974                 ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
975                              ret, fmr_list, num_fmr, unmap_fmr_cnt);
976         return ret;
977 } /* end ehca_unmap_fmr() */
978
979 /*----------------------------------------------------------------------*/
980
981 int ehca_dealloc_fmr(struct ib_fmr *fmr)
982 {
983         int ret;
984         u64 h_ret;
985         struct ehca_shca *shca =
986                 container_of(fmr->device, struct ehca_shca, ib_device);
987         struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
988
989         if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
990                 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
991                          e_fmr, e_fmr->flags);
992                 ret = -EINVAL;
993                 goto free_fmr_exit0;
994         }
995
996         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
997         if (h_ret != H_SUCCESS) {
998                 ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
999                          "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
1000                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1001                          e_fmr->ipz_mr_handle.handle, fmr->lkey);
1002                 ret = ehca2ib_return_code(h_ret);
1003                 goto free_fmr_exit0;
1004         }
1005         /* successful deregistration */
1006         ehca_mr_delete(e_fmr);
1007         return 0;
1008
1009 free_fmr_exit0:
1010         if (ret)
1011                 ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1012         return ret;
1013 } /* end ehca_dealloc_fmr() */
1014
1015 /*----------------------------------------------------------------------*/
1016
1017 static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
1018                                    struct ehca_mr *e_mr,
1019                                    struct ehca_mr_pginfo *pginfo);
1020
1021 int ehca_reg_mr(struct ehca_shca *shca,
1022                 struct ehca_mr *e_mr,
1023                 u64 *iova_start,
1024                 u64 size,
1025                 int acl,
1026                 struct ehca_pd *e_pd,
1027                 struct ehca_mr_pginfo *pginfo,
1028                 u32 *lkey, /*OUT*/
1029                 u32 *rkey, /*OUT*/
1030                 enum ehca_reg_type reg_type)
1031 {
1032         int ret;
1033         u64 h_ret;
1034         u32 hipz_acl;
1035         struct ehca_mr_hipzout_parms hipzout;
1036
1037         ehca_mrmw_map_acl(acl, &hipz_acl);
1038         ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1039         if (ehca_use_hp_mr == 1)
1040                 hipz_acl |= 0x00000001;
1041
1042         h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
1043                                          (u64)iova_start, size, hipz_acl,
1044                                          e_pd->fw_pd, &hipzout);
1045         if (h_ret != H_SUCCESS) {
1046                 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
1047                          "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
1048                 ret = ehca2ib_return_code(h_ret);
1049                 goto ehca_reg_mr_exit0;
1050         }
1051
1052         e_mr->ipz_mr_handle = hipzout.handle;
1053
1054         if (reg_type == EHCA_REG_BUSMAP_MR)
1055                 ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
1056         else if (reg_type == EHCA_REG_MR)
1057                 ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
1058         else
1059                 ret = -EINVAL;
1060
1061         if (ret)
1062                 goto ehca_reg_mr_exit1;
1063
1064         /* successful registration */
1065         e_mr->num_kpages = pginfo->num_kpages;
1066         e_mr->num_hwpages = pginfo->num_hwpages;
1067         e_mr->hwpage_size = pginfo->hwpage_size;
1068         e_mr->start = iova_start;
1069         e_mr->size = size;
1070         e_mr->acl = acl;
1071         *lkey = hipzout.lkey;
1072         *rkey = hipzout.rkey;
1073         return 0;
1074
1075 ehca_reg_mr_exit1:
1076         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1077         if (h_ret != H_SUCCESS) {
1078                 ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
1079                          "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
1080                          "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
1081                          h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1082                          hipzout.lkey, pginfo, pginfo->num_kpages,
1083                          pginfo->num_hwpages, ret);
1084                 ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
1085                          "not recoverable");
1086         }
1087 ehca_reg_mr_exit0:
1088         if (ret)
1089                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1090                          "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1091                          "num_kpages=%llx num_hwpages=%llx",
1092                          ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1093                          pginfo->num_kpages, pginfo->num_hwpages);
1094         return ret;
1095 } /* end ehca_reg_mr() */
1096
1097 /*----------------------------------------------------------------------*/
1098
1099 int ehca_reg_mr_rpages(struct ehca_shca *shca,
1100                        struct ehca_mr *e_mr,
1101                        struct ehca_mr_pginfo *pginfo)
1102 {
1103         int ret = 0;
1104         u64 h_ret;
1105         u32 rnum;
1106         u64 rpage;
1107         u32 i;
1108         u64 *kpage;
1109
1110         if (!pginfo->num_hwpages) /* in case of fmr */
1111                 return 0;
1112
1113         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1114         if (!kpage) {
1115                 ehca_err(&shca->ib_device, "kpage alloc failed");
1116                 ret = -ENOMEM;
1117                 goto ehca_reg_mr_rpages_exit0;
1118         }
1119
1120         /* max MAX_RPAGES ehca mr pages per register call */
1121         for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1122
1123                 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1124                         rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1125                         if (rnum == 0)
1126                                 rnum = MAX_RPAGES;      /* last shot is full */
1127                 } else
1128                         rnum = MAX_RPAGES;
1129
1130                 ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1131                 if (ret) {
1132                         ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1133                                  "bad rc, ret=%i rnum=%x kpage=%p",
1134                                  ret, rnum, kpage);
1135                         goto ehca_reg_mr_rpages_exit1;
1136                 }
1137
1138                 if (rnum > 1) {
1139                         rpage = __pa(kpage);
1140                         if (!rpage) {
1141                                 ehca_err(&shca->ib_device, "kpage=%p i=%x",
1142                                          kpage, i);
1143                                 ret = -EFAULT;
1144                                 goto ehca_reg_mr_rpages_exit1;
1145                         }
1146                 } else
1147                         rpage = *kpage;
1148
1149                 h_ret = hipz_h_register_rpage_mr(
1150                         shca->ipz_hca_handle, e_mr,
1151                         ehca_encode_hwpage_size(pginfo->hwpage_size),
1152                         0, rpage, rnum);
1153
1154                 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1155                         /*
1156                          * check for 'registration complete'==H_SUCCESS
1157                          * and for 'page registered'==H_PAGE_REGISTERED
1158                          */
1159                         if (h_ret != H_SUCCESS) {
1160                                 ehca_err(&shca->ib_device, "last "
1161                                          "hipz_reg_rpage_mr failed, h_ret=%lli "
1162                                          "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
1163                                          " lkey=%x", h_ret, e_mr, i,
1164                                          shca->ipz_hca_handle.handle,
1165                                          e_mr->ipz_mr_handle.handle,
1166                                          e_mr->ib.ib_mr.lkey);
1167                                 ret = ehca2ib_return_code(h_ret);
1168                                 break;
1169                         } else
1170                                 ret = 0;
1171                 } else if (h_ret != H_PAGE_REGISTERED) {
1172                         ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1173                                  "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
1174                                  "mr_hndl=%llx", h_ret, e_mr, i,
1175                                  e_mr->ib.ib_mr.lkey,
1176                                  shca->ipz_hca_handle.handle,
1177                                  e_mr->ipz_mr_handle.handle);
1178                         ret = ehca2ib_return_code(h_ret);
1179                         break;
1180                 } else
1181                         ret = 0;
1182         } /* end for(i) */
1183
1184
1185 ehca_reg_mr_rpages_exit1:
1186         ehca_free_fw_ctrlblock(kpage);
1187 ehca_reg_mr_rpages_exit0:
1188         if (ret)
1189                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1190                          "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
1191                          pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1192         return ret;
1193 } /* end ehca_reg_mr_rpages() */
1194
1195 /*----------------------------------------------------------------------*/
1196
1197 inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1198                                 struct ehca_mr *e_mr,
1199                                 u64 *iova_start,
1200                                 u64 size,
1201                                 u32 acl,
1202                                 struct ehca_pd *e_pd,
1203                                 struct ehca_mr_pginfo *pginfo,
1204                                 u32 *lkey, /*OUT*/
1205                                 u32 *rkey) /*OUT*/
1206 {
1207         int ret;
1208         u64 h_ret;
1209         u32 hipz_acl;
1210         u64 *kpage;
1211         u64 rpage;
1212         struct ehca_mr_pginfo pginfo_save;
1213         struct ehca_mr_hipzout_parms hipzout;
1214
1215         ehca_mrmw_map_acl(acl, &hipz_acl);
1216         ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1217
1218         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1219         if (!kpage) {
1220                 ehca_err(&shca->ib_device, "kpage alloc failed");
1221                 ret = -ENOMEM;
1222                 goto ehca_rereg_mr_rereg1_exit0;
1223         }
1224
1225         pginfo_save = *pginfo;
1226         ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1227         if (ret) {
1228                 ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1229                          "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
1230                          "kpage=%p", e_mr, pginfo, pginfo->type,
1231                          pginfo->num_kpages, pginfo->num_hwpages, kpage);
1232                 goto ehca_rereg_mr_rereg1_exit1;
1233         }
1234         rpage = __pa(kpage);
1235         if (!rpage) {
1236                 ehca_err(&shca->ib_device, "kpage=%p", kpage);
1237                 ret = -EFAULT;
1238                 goto ehca_rereg_mr_rereg1_exit1;
1239         }
1240         h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1241                                       (u64)iova_start, size, hipz_acl,
1242                                       e_pd->fw_pd, rpage, &hipzout);
1243         if (h_ret != H_SUCCESS) {
1244                 /*
1245                  * reregistration unsuccessful, try it again with the 3 hCalls,
1246                  * e.g. this is required in case H_MR_CONDITION
1247                  * (MW bound or MR is shared)
1248                  */
1249                 ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1250                           "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
1251                 *pginfo = pginfo_save;
1252                 ret = -EAGAIN;
1253         } else if ((u64 *)hipzout.vaddr != iova_start) {
1254                 ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1255                          "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
1256                          "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
1257                          hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1258                          e_mr->ib.ib_mr.lkey, hipzout.lkey);
1259                 ret = -EFAULT;
1260         } else {
1261                 /*
1262                  * successful reregistration
1263                  * note: start and start_out are identical for eServer HCAs
1264                  */
1265                 e_mr->num_kpages = pginfo->num_kpages;
1266                 e_mr->num_hwpages = pginfo->num_hwpages;
1267                 e_mr->hwpage_size = pginfo->hwpage_size;
1268                 e_mr->start = iova_start;
1269                 e_mr->size = size;
1270                 e_mr->acl = acl;
1271                 *lkey = hipzout.lkey;
1272                 *rkey = hipzout.rkey;
1273         }
1274
1275 ehca_rereg_mr_rereg1_exit1:
1276         ehca_free_fw_ctrlblock(kpage);
1277 ehca_rereg_mr_rereg1_exit0:
1278         if ( ret && (ret != -EAGAIN) )
1279                 ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1280                          "pginfo=%p num_kpages=%llx num_hwpages=%llx",
1281                          ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1282                          pginfo->num_hwpages);
1283         return ret;
1284 } /* end ehca_rereg_mr_rereg1() */
1285
1286 /*----------------------------------------------------------------------*/
1287
1288 int ehca_rereg_mr(struct ehca_shca *shca,
1289                   struct ehca_mr *e_mr,
1290                   u64 *iova_start,
1291                   u64 size,
1292                   int acl,
1293                   struct ehca_pd *e_pd,
1294                   struct ehca_mr_pginfo *pginfo,
1295                   u32 *lkey,
1296                   u32 *rkey)
1297 {
1298         int ret = 0;
1299         u64 h_ret;
1300         int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1301         int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1302
1303         /* first determine reregistration hCall(s) */
1304         if ((pginfo->num_hwpages > MAX_RPAGES) ||
1305             (e_mr->num_hwpages > MAX_RPAGES) ||
1306             (pginfo->num_hwpages > e_mr->num_hwpages)) {
1307                 ehca_dbg(&shca->ib_device, "Rereg3 case, "
1308                          "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
1309                          pginfo->num_hwpages, e_mr->num_hwpages);
1310                 rereg_1_hcall = 0;
1311                 rereg_3_hcall = 1;
1312         }
1313
1314         if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
1315                 rereg_1_hcall = 0;
1316                 rereg_3_hcall = 1;
1317                 e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1318                 ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1319                          e_mr);
1320         }
1321
1322         if (rereg_1_hcall) {
1323                 ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1324                                            acl, e_pd, pginfo, lkey, rkey);
1325                 if (ret) {
1326                         if (ret == -EAGAIN)
1327                                 rereg_3_hcall = 1;
1328                         else
1329                                 goto ehca_rereg_mr_exit0;
1330                 }
1331         }
1332
1333         if (rereg_3_hcall) {
1334                 struct ehca_mr save_mr;
1335
1336                 /* first deregister old MR */
1337                 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1338                 if (h_ret != H_SUCCESS) {
1339                         ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1340                                  "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
1341                                  "mr->lkey=%x",
1342                                  h_ret, e_mr, shca->ipz_hca_handle.handle,
1343                                  e_mr->ipz_mr_handle.handle,
1344                                  e_mr->ib.ib_mr.lkey);
1345                         ret = ehca2ib_return_code(h_ret);
1346                         goto ehca_rereg_mr_exit0;
1347                 }
1348                 /* clean ehca_mr_t, without changing struct ib_mr and lock */
1349                 save_mr = *e_mr;
1350                 ehca_mr_deletenew(e_mr);
1351
1352                 /* set some MR values */
1353                 e_mr->flags = save_mr.flags;
1354                 e_mr->hwpage_size = save_mr.hwpage_size;
1355                 e_mr->fmr_page_size = save_mr.fmr_page_size;
1356                 e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1357                 e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1358                 e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1359
1360                 ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1361                                   e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
1362                 if (ret) {
1363                         u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1364                         memcpy(&e_mr->flags, &(save_mr.flags),
1365                                sizeof(struct ehca_mr) - offset);
1366                         goto ehca_rereg_mr_exit0;
1367                 }
1368         }
1369
1370 ehca_rereg_mr_exit0:
1371         if (ret)
1372                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1373                          "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1374                          "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
1375                          "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1376                          acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1377                          rereg_1_hcall, rereg_3_hcall);
1378         return ret;
1379 } /* end ehca_rereg_mr() */
1380
1381 /*----------------------------------------------------------------------*/
1382
1383 int ehca_unmap_one_fmr(struct ehca_shca *shca,
1384                        struct ehca_mr *e_fmr)
1385 {
1386         int ret = 0;
1387         u64 h_ret;
1388         struct ehca_pd *e_pd =
1389                 container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1390         struct ehca_mr save_fmr;
1391         u32 tmp_lkey, tmp_rkey;
1392         struct ehca_mr_pginfo pginfo;
1393         struct ehca_mr_hipzout_parms hipzout;
1394         struct ehca_mr save_mr;
1395
1396         if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1397                 /*
1398                  * note: after using rereg hcall with len=0,
1399                  * rereg hcall must be used again for registering pages
1400                  */
1401                 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1402                                               0, 0, e_pd->fw_pd, 0, &hipzout);
1403                 if (h_ret == H_SUCCESS) {
1404                         /* successful reregistration */
1405                         e_fmr->start = NULL;
1406                         e_fmr->size = 0;
1407                         tmp_lkey = hipzout.lkey;
1408                         tmp_rkey = hipzout.rkey;
1409                         return 0;
1410                 }
1411                 /*
1412                  * should not happen, because length checked above,
1413                  * FMRs are not shared and no MW bound to FMRs
1414                  */
1415                 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1416                          "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
1417                          "mr_hndl=%llx lkey=%x lkey_out=%x",
1418                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1419                          e_fmr->ipz_mr_handle.handle,
1420                          e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1421                 /* try free and rereg */
1422         }
1423
1424         /* first free old FMR */
1425         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1426         if (h_ret != H_SUCCESS) {
1427                 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1428                          "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
1429                          "lkey=%x",
1430                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1431                          e_fmr->ipz_mr_handle.handle,
1432                          e_fmr->ib.ib_fmr.lkey);
1433                 ret = ehca2ib_return_code(h_ret);
1434                 goto ehca_unmap_one_fmr_exit0;
1435         }
1436         /* clean ehca_mr_t, without changing lock */
1437         save_fmr = *e_fmr;
1438         ehca_mr_deletenew(e_fmr);
1439
1440         /* set some MR values */
1441         e_fmr->flags = save_fmr.flags;
1442         e_fmr->hwpage_size = save_fmr.hwpage_size;
1443         e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1444         e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1445         e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1446         e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1447         e_fmr->acl = save_fmr.acl;
1448
1449         memset(&pginfo, 0, sizeof(pginfo));
1450         pginfo.type = EHCA_MR_PGI_FMR;
1451         ret = ehca_reg_mr(shca, e_fmr, NULL,
1452                           (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1453                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1454                           &tmp_rkey, EHCA_REG_MR);
1455         if (ret) {
1456                 u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1457                 memcpy(&e_fmr->flags, &(save_mr.flags),
1458                        sizeof(struct ehca_mr) - offset);
1459         }
1460
1461 ehca_unmap_one_fmr_exit0:
1462         if (ret)
1463                 ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1464                          "fmr_max_pages=%x",
1465                          ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1466         return ret;
1467 } /* end ehca_unmap_one_fmr() */
1468
1469 /*----------------------------------------------------------------------*/
1470
1471 int ehca_reg_smr(struct ehca_shca *shca,
1472                  struct ehca_mr *e_origmr,
1473                  struct ehca_mr *e_newmr,
1474                  u64 *iova_start,
1475                  int acl,
1476                  struct ehca_pd *e_pd,
1477                  u32 *lkey, /*OUT*/
1478                  u32 *rkey) /*OUT*/
1479 {
1480         int ret = 0;
1481         u64 h_ret;
1482         u32 hipz_acl;
1483         struct ehca_mr_hipzout_parms hipzout;
1484
1485         ehca_mrmw_map_acl(acl, &hipz_acl);
1486         ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1487
1488         h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1489                                     (u64)iova_start, hipz_acl, e_pd->fw_pd,
1490                                     &hipzout);
1491         if (h_ret != H_SUCCESS) {
1492                 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1493                          "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1494                          "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1495                          h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1496                          shca->ipz_hca_handle.handle,
1497                          e_origmr->ipz_mr_handle.handle,
1498                          e_origmr->ib.ib_mr.lkey);
1499                 ret = ehca2ib_return_code(h_ret);
1500                 goto ehca_reg_smr_exit0;
1501         }
1502         /* successful registration */
1503         e_newmr->num_kpages = e_origmr->num_kpages;
1504         e_newmr->num_hwpages = e_origmr->num_hwpages;
1505         e_newmr->hwpage_size   = e_origmr->hwpage_size;
1506         e_newmr->start = iova_start;
1507         e_newmr->size = e_origmr->size;
1508         e_newmr->acl = acl;
1509         e_newmr->ipz_mr_handle = hipzout.handle;
1510         *lkey = hipzout.lkey;
1511         *rkey = hipzout.rkey;
1512         return 0;
1513
1514 ehca_reg_smr_exit0:
1515         if (ret)
1516                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1517                          "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1518                          ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1519         return ret;
1520 } /* end ehca_reg_smr() */
1521
1522 /*----------------------------------------------------------------------*/
1523 static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1524 {
1525         unsigned long ret = idx;
1526         ret |= dir << EHCA_DIR_INDEX_SHIFT;
1527         ret |= top << EHCA_TOP_INDEX_SHIFT;
1528         return __va(ret << SECTION_SIZE_BITS);
1529 }
1530
1531 #define ehca_bmap_valid(entry) \
1532         ((u64)entry != (u64)EHCA_INVAL_ADDR)
1533
1534 static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1535                                struct ehca_shca *shca, struct ehca_mr *mr,
1536                                struct ehca_mr_pginfo *pginfo)
1537 {
1538         u64 h_ret = 0;
1539         unsigned long page = 0;
1540         u64 rpage = __pa(kpage);
1541         int page_count;
1542
1543         void *sectbase = ehca_calc_sectbase(top, dir, idx);
1544         if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
1545                 ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
1546                                            "hwpage_size does not fit to "
1547                                            "section start address");
1548         }
1549         page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
1550
1551         while (page < page_count) {
1552                 u64 rnum;
1553                 for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1554                      rnum++) {
1555                         void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1556                         kpage[rnum] = __pa(pg);
1557                 }
1558
1559                 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
1560                         ehca_encode_hwpage_size(pginfo->hwpage_size),
1561                         0, rpage, rnum);
1562
1563                 if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
1564                         ehca_err(&shca->ib_device, "register_rpage_mr failed");
1565                         return h_ret;
1566                 }
1567         }
1568         return h_ret;
1569 }
1570
1571 static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
1572                                 struct ehca_shca *shca, struct ehca_mr *mr,
1573                                 struct ehca_mr_pginfo *pginfo)
1574 {
1575         u64 hret = H_SUCCESS;
1576         int idx;
1577
1578         for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
1579                 if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
1580                         continue;
1581
1582                 hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
1583                                            pginfo);
1584                 if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1585                                 return hret;
1586         }
1587         return hret;
1588 }
1589
1590 static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
1591                                     struct ehca_mr *mr,
1592                                     struct ehca_mr_pginfo *pginfo)
1593 {
1594         u64 hret = H_SUCCESS;
1595         int dir;
1596
1597         for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
1598                 if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
1599                         continue;
1600
1601                 hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
1602                 if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1603                                 return hret;
1604         }
1605         return hret;
1606 }
1607
1608 /* register internal max-MR to internal SHCA */
1609 int ehca_reg_internal_maxmr(
1610         struct ehca_shca *shca,
1611         struct ehca_pd *e_pd,
1612         struct ehca_mr **e_maxmr)  /*OUT*/
1613 {
1614         int ret;
1615         struct ehca_mr *e_mr;
1616         u64 *iova_start;
1617         u64 size_maxmr;
1618         struct ehca_mr_pginfo pginfo;
1619         struct ib_phys_buf ib_pbuf;
1620         u32 num_kpages;
1621         u32 num_hwpages;
1622         u64 hw_pgsize;
1623
1624         if (!ehca_bmap) {
1625                 ret = -EFAULT;
1626                 goto ehca_reg_internal_maxmr_exit0;
1627         }
1628
1629         e_mr = ehca_mr_new();
1630         if (!e_mr) {
1631                 ehca_err(&shca->ib_device, "out of memory");
1632                 ret = -ENOMEM;
1633                 goto ehca_reg_internal_maxmr_exit0;
1634         }
1635         e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1636
1637         /* register internal max-MR on HCA */
1638         size_maxmr = ehca_mr_len;
1639         iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
1640         ib_pbuf.addr = 0;
1641         ib_pbuf.size = size_maxmr;
1642         num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1643                                 PAGE_SIZE);
1644         hw_pgsize = ehca_get_max_hwpage_size(shca);
1645         num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
1646                                  hw_pgsize);
1647
1648         memset(&pginfo, 0, sizeof(pginfo));
1649         pginfo.type = EHCA_MR_PGI_PHYS;
1650         pginfo.num_kpages = num_kpages;
1651         pginfo.num_hwpages = num_hwpages;
1652         pginfo.hwpage_size = hw_pgsize;
1653         pginfo.u.phy.num_phys_buf = 1;
1654         pginfo.u.phy.phys_buf_array = &ib_pbuf;
1655
1656         ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1657                           &pginfo, &e_mr->ib.ib_mr.lkey,
1658                           &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
1659         if (ret) {
1660                 ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1661                          "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
1662                          "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1663                          num_kpages, num_hwpages);
1664                 goto ehca_reg_internal_maxmr_exit1;
1665         }
1666
1667         /* successful registration of all pages */
1668         e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1669         e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1670         e_mr->ib.ib_mr.uobject = NULL;
1671         atomic_inc(&(e_pd->ib_pd.usecnt));
1672         atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1673         *e_maxmr = e_mr;
1674         return 0;
1675
1676 ehca_reg_internal_maxmr_exit1:
1677         ehca_mr_delete(e_mr);
1678 ehca_reg_internal_maxmr_exit0:
1679         if (ret)
1680                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1681                          ret, shca, e_pd, e_maxmr);
1682         return ret;
1683 } /* end ehca_reg_internal_maxmr() */
1684
1685 /*----------------------------------------------------------------------*/
1686
1687 int ehca_reg_maxmr(struct ehca_shca *shca,
1688                    struct ehca_mr *e_newmr,
1689                    u64 *iova_start,
1690                    int acl,
1691                    struct ehca_pd *e_pd,
1692                    u32 *lkey,
1693                    u32 *rkey)
1694 {
1695         u64 h_ret;
1696         struct ehca_mr *e_origmr = shca->maxmr;
1697         u32 hipz_acl;
1698         struct ehca_mr_hipzout_parms hipzout;
1699
1700         ehca_mrmw_map_acl(acl, &hipz_acl);
1701         ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1702
1703         h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1704                                     (u64)iova_start, hipz_acl, e_pd->fw_pd,
1705                                     &hipzout);
1706         if (h_ret != H_SUCCESS) {
1707                 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1708                          "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1709                          h_ret, e_origmr, shca->ipz_hca_handle.handle,
1710                          e_origmr->ipz_mr_handle.handle,
1711                          e_origmr->ib.ib_mr.lkey);
1712                 return ehca2ib_return_code(h_ret);
1713         }
1714         /* successful registration */
1715         e_newmr->num_kpages = e_origmr->num_kpages;
1716         e_newmr->num_hwpages = e_origmr->num_hwpages;
1717         e_newmr->hwpage_size = e_origmr->hwpage_size;
1718         e_newmr->start = iova_start;
1719         e_newmr->size = e_origmr->size;
1720         e_newmr->acl = acl;
1721         e_newmr->ipz_mr_handle = hipzout.handle;
1722         *lkey = hipzout.lkey;
1723         *rkey = hipzout.rkey;
1724         return 0;
1725 } /* end ehca_reg_maxmr() */
1726
1727 /*----------------------------------------------------------------------*/
1728
1729 int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1730 {
1731         int ret;
1732         struct ehca_mr *e_maxmr;
1733         struct ib_pd *ib_pd;
1734
1735         if (!shca->maxmr) {
1736                 ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1737                 ret = -EINVAL;
1738                 goto ehca_dereg_internal_maxmr_exit0;
1739         }
1740
1741         e_maxmr = shca->maxmr;
1742         ib_pd = e_maxmr->ib.ib_mr.pd;
1743         shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1744
1745         ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1746         if (ret) {
1747                 ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1748                          "ret=%i e_maxmr=%p shca=%p lkey=%x",
1749                          ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1750                 shca->maxmr = e_maxmr;
1751                 goto ehca_dereg_internal_maxmr_exit0;
1752         }
1753
1754         atomic_dec(&ib_pd->usecnt);
1755
1756 ehca_dereg_internal_maxmr_exit0:
1757         if (ret)
1758                 ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1759                          ret, shca, shca->maxmr);
1760         return ret;
1761 } /* end ehca_dereg_internal_maxmr() */
1762
1763 /*----------------------------------------------------------------------*/
1764
1765 /*
1766  * check physical buffer array of MR verbs for validness and
1767  * calculates MR size
1768  */
1769 int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1770                                   int num_phys_buf,
1771                                   u64 *iova_start,
1772                                   u64 *size)
1773 {
1774         struct ib_phys_buf *pbuf = phys_buf_array;
1775         u64 size_count = 0;
1776         u32 i;
1777
1778         if (num_phys_buf == 0) {
1779                 ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1780                 return -EINVAL;
1781         }
1782         /* check first buffer */
1783         if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1784                 ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1785                              "pbuf->addr=%llx pbuf->size=%llx",
1786                              iova_start, pbuf->addr, pbuf->size);
1787                 return -EINVAL;
1788         }
1789         if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1790             (num_phys_buf > 1)) {
1791                 ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1792                              "pbuf->size=%llx", pbuf->addr, pbuf->size);
1793                 return -EINVAL;
1794         }
1795
1796         for (i = 0; i < num_phys_buf; i++) {
1797                 if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1798                         ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1799                                      "pbuf->size=%llx",
1800                                      i, pbuf->addr, pbuf->size);
1801                         return -EINVAL;
1802                 }
1803                 if (((i > 0) && /* not 1st */
1804                      (i < (num_phys_buf - 1)) &&        /* not last */
1805                      (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1806                         ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1807                                      i, pbuf->size);
1808                         return -EINVAL;
1809                 }
1810                 size_count += pbuf->size;
1811                 pbuf++;
1812         }
1813
1814         *size = size_count;
1815         return 0;
1816 } /* end ehca_mr_chk_buf_and_calc_size() */
1817
1818 /*----------------------------------------------------------------------*/
1819
1820 /* check page list of map FMR verb for validness */
1821 int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1822                              u64 *page_list,
1823                              int list_len)
1824 {
1825         u32 i;
1826         u64 *page;
1827
1828         if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1829                 ehca_gen_err("bad list_len, list_len=%x "
1830                              "e_fmr->fmr_max_pages=%x fmr=%p",
1831                              list_len, e_fmr->fmr_max_pages, e_fmr);
1832                 return -EINVAL;
1833         }
1834
1835         /* each page must be aligned */
1836         page = page_list;
1837         for (i = 0; i < list_len; i++) {
1838                 if (*page % e_fmr->fmr_page_size) {
1839                         ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
1840                                      "fmr_page_size=%x", i, *page, page, e_fmr,
1841                                      e_fmr->fmr_page_size);
1842                         return -EINVAL;
1843                 }
1844                 page++;
1845         }
1846
1847         return 0;
1848 } /* end ehca_fmr_check_page_list() */
1849
1850 /*----------------------------------------------------------------------*/
1851
1852 /* PAGE_SIZE >= pginfo->hwpage_size */
1853 static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1854                                   u32 number,
1855                                   u64 *kpage)
1856 {
1857         int ret = 0;
1858         u64 pgaddr;
1859         u32 j = 0;
1860         int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
1861         struct scatterlist **sg = &pginfo->u.usr.next_sg;
1862
1863         while (*sg != NULL) {
1864                 pgaddr = page_to_pfn(sg_page(*sg))
1865                         << PAGE_SHIFT;
1866                 *kpage = pgaddr + (pginfo->next_hwpage *
1867                                    pginfo->hwpage_size);
1868                 if (!(*kpage)) {
1869                         ehca_gen_err("pgaddr=%llx "
1870                                      "sg_dma_address=%llx "
1871                                      "entry=%llx next_hwpage=%llx",
1872                                      pgaddr, (u64)sg_dma_address(*sg),
1873                                      pginfo->u.usr.next_nmap,
1874                                      pginfo->next_hwpage);
1875                         return -EFAULT;
1876                 }
1877                 (pginfo->hwpage_cnt)++;
1878                 (pginfo->next_hwpage)++;
1879                 kpage++;
1880                 if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
1881                         (pginfo->kpage_cnt)++;
1882                         (pginfo->u.usr.next_nmap)++;
1883                         pginfo->next_hwpage = 0;
1884                         *sg = sg_next(*sg);
1885                 }
1886                 j++;
1887                 if (j >= number)
1888                         break;
1889         }
1890
1891         return ret;
1892 }
1893
1894 /*
1895  * check given pages for contiguous layout
1896  * last page addr is returned in prev_pgaddr for further check
1897  */
1898 static int ehca_check_kpages_per_ate(struct scatterlist **sg,
1899                                      int num_pages,
1900                                      u64 *prev_pgaddr)
1901 {
1902         for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
1903                 u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
1904                 if (ehca_debug_level >= 3)
1905                         ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1906                                      *(u64 *)__va(pgaddr));
1907                 if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1908                         ehca_gen_err("uncontiguous page found pgaddr=%llx "
1909                                      "prev_pgaddr=%llx entries_left_in_hwpage=%x",
1910                                      pgaddr, *prev_pgaddr, num_pages);
1911                         return -EINVAL;
1912                 }
1913                 *prev_pgaddr = pgaddr;
1914         }
1915         return 0;
1916 }
1917
1918 /* PAGE_SIZE < pginfo->hwpage_size */
1919 static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1920                                   u32 number,
1921                                   u64 *kpage)
1922 {
1923         int ret = 0;
1924         u64 pgaddr, prev_pgaddr;
1925         u32 j = 0;
1926         int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
1927         int nr_kpages = kpages_per_hwpage;
1928         struct scatterlist **sg = &pginfo->u.usr.next_sg;
1929
1930         while (*sg != NULL) {
1931
1932                 if (nr_kpages == kpages_per_hwpage) {
1933                         pgaddr = (page_to_pfn(sg_page(*sg))
1934                                    << PAGE_SHIFT);
1935                         *kpage = pgaddr;
1936                         if (!(*kpage)) {
1937                                 ehca_gen_err("pgaddr=%llx entry=%llx",
1938                                              pgaddr, pginfo->u.usr.next_nmap);
1939                                 ret = -EFAULT;
1940                                 return ret;
1941                         }
1942                         /*
1943                          * The first page in a hwpage must be aligned;
1944                          * the first MR page is exempt from this rule.
1945                          */
1946                         if (pgaddr & (pginfo->hwpage_size - 1)) {
1947                                 if (pginfo->hwpage_cnt) {
1948                                         ehca_gen_err(
1949                                                 "invalid alignment "
1950                                                 "pgaddr=%llx entry=%llx "
1951                                                 "mr_pgsize=%llx",
1952                                                 pgaddr, pginfo->u.usr.next_nmap,
1953                                                 pginfo->hwpage_size);
1954                                         ret = -EFAULT;
1955                                         return ret;
1956                                 }
1957                                 /* first MR page */
1958                                 pginfo->kpage_cnt =
1959                                         (pgaddr &
1960                                          (pginfo->hwpage_size - 1)) >>
1961                                         PAGE_SHIFT;
1962                                 nr_kpages -= pginfo->kpage_cnt;
1963                                 *kpage = pgaddr &
1964                                          ~(pginfo->hwpage_size - 1);
1965                         }
1966                         if (ehca_debug_level >= 3) {
1967                                 u64 val = *(u64 *)__va(pgaddr);
1968                                 ehca_gen_dbg("kpage=%llx page=%llx "
1969                                              "value=%016llx",
1970                                              *kpage, pgaddr, val);
1971                         }
1972                         prev_pgaddr = pgaddr;
1973                         *sg = sg_next(*sg);
1974                         pginfo->kpage_cnt++;
1975                         pginfo->u.usr.next_nmap++;
1976                         nr_kpages--;
1977                         if (!nr_kpages)
1978                                 goto next_kpage;
1979                         continue;
1980                 }
1981
1982                 ret = ehca_check_kpages_per_ate(sg, nr_kpages,
1983                                                 &prev_pgaddr);
1984                 if (ret)
1985                         return ret;
1986                 pginfo->kpage_cnt += nr_kpages;
1987                 pginfo->u.usr.next_nmap += nr_kpages;
1988
1989 next_kpage:
1990                 nr_kpages = kpages_per_hwpage;
1991                 (pginfo->hwpage_cnt)++;
1992                 kpage++;
1993                 j++;
1994                 if (j >= number)
1995                         break;
1996         }
1997
1998         return ret;
1999 }
2000
2001 static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2002                                  u32 number, u64 *kpage)
2003 {
2004         int ret = 0;
2005         struct ib_phys_buf *pbuf;
2006         u64 num_hw, offs_hw;
2007         u32 i = 0;
2008
2009         /* loop over desired phys_buf_array entries */
2010         while (i < number) {
2011                 pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
2012                 num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
2013                                      pbuf->size, pginfo->hwpage_size);
2014                 offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
2015                         pginfo->hwpage_size;
2016                 while (pginfo->next_hwpage < offs_hw + num_hw) {
2017                         /* sanity check */
2018                         if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2019                             (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2020                                 ehca_gen_err("kpage_cnt >= num_kpages, "
2021                                              "kpage_cnt=%llx num_kpages=%llx "
2022                                              "hwpage_cnt=%llx "
2023                                              "num_hwpages=%llx i=%x",
2024                                              pginfo->kpage_cnt,
2025                                              pginfo->num_kpages,
2026                                              pginfo->hwpage_cnt,
2027                                              pginfo->num_hwpages, i);
2028                                 return -EFAULT;
2029                         }
2030                         *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2031                                  (pginfo->next_hwpage * pginfo->hwpage_size);
2032                         if ( !(*kpage) && pbuf->addr ) {
2033                                 ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2034                                              "next_hwpage=%llx", pbuf->addr,
2035                                              pbuf->size, pginfo->next_hwpage);
2036                                 return -EFAULT;
2037                         }
2038                         (pginfo->hwpage_cnt)++;
2039                         (pginfo->next_hwpage)++;
2040                         if (PAGE_SIZE >= pginfo->hwpage_size) {
2041                                 if (pginfo->next_hwpage %
2042                                     (PAGE_SIZE / pginfo->hwpage_size) == 0)
2043                                         (pginfo->kpage_cnt)++;
2044                         } else
2045                                 pginfo->kpage_cnt += pginfo->hwpage_size /
2046                                         PAGE_SIZE;
2047                         kpage++;
2048                         i++;
2049                         if (i >= number) break;
2050                 }
2051                 if (pginfo->next_hwpage >= offs_hw + num_hw) {
2052                         (pginfo->u.phy.next_buf)++;
2053                         pginfo->next_hwpage = 0;
2054                 }
2055         }
2056         return ret;
2057 }
2058
2059 static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2060                                 u32 number, u64 *kpage)
2061 {
2062         int ret = 0;
2063         u64 *fmrlist;
2064         u32 i;
2065
2066         /* loop over desired page_list entries */
2067         fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2068         for (i = 0; i < number; i++) {
2069                 *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
2070                            pginfo->next_hwpage * pginfo->hwpage_size;
2071                 if ( !(*kpage) ) {
2072                         ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2073                                      "next_listelem=%llx next_hwpage=%llx",
2074                                      *fmrlist, fmrlist,
2075                                      pginfo->u.fmr.next_listelem,
2076                                      pginfo->next_hwpage);
2077                         return -EFAULT;
2078                 }
2079                 (pginfo->hwpage_cnt)++;
2080                 if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
2081                         if (pginfo->next_hwpage %
2082                             (pginfo->u.fmr.fmr_pgsize /
2083                              pginfo->hwpage_size) == 0) {
2084                                 (pginfo->kpage_cnt)++;
2085                                 (pginfo->u.fmr.next_listelem)++;
2086                                 fmrlist++;
2087                                 pginfo->next_hwpage = 0;
2088                         } else
2089                                 (pginfo->next_hwpage)++;
2090                 } else {
2091                         unsigned int cnt_per_hwpage = pginfo->hwpage_size /
2092                                 pginfo->u.fmr.fmr_pgsize;
2093                         unsigned int j;
2094                         u64 prev = *kpage;
2095                         /* check if adrs are contiguous */
2096                         for (j = 1; j < cnt_per_hwpage; j++) {
2097                                 u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
2098                                 if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2099                                         ehca_gen_err("uncontiguous fmr pages "
2100                                                      "found prev=%llx p=%llx "
2101                                                      "idx=%x", prev, p, i + j);
2102                                         return -EINVAL;
2103                                 }
2104                                 prev = p;
2105                         }
2106                         pginfo->kpage_cnt += cnt_per_hwpage;
2107                         pginfo->u.fmr.next_listelem += cnt_per_hwpage;
2108                         fmrlist += cnt_per_hwpage;
2109                 }
2110                 kpage++;
2111         }
2112         return ret;
2113 }
2114
2115 /* setup page buffer from page info */
2116 int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
2117                      u32 number,
2118                      u64 *kpage)
2119 {
2120         int ret;
2121
2122         switch (pginfo->type) {
2123         case EHCA_MR_PGI_PHYS:
2124                 ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
2125                 break;
2126         case EHCA_MR_PGI_USER:
2127                 ret = PAGE_SIZE >= pginfo->hwpage_size ?
2128                         ehca_set_pagebuf_user1(pginfo, number, kpage) :
2129                         ehca_set_pagebuf_user2(pginfo, number, kpage);
2130                 break;
2131         case EHCA_MR_PGI_FMR:
2132                 ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
2133                 break;
2134         default:
2135                 ehca_gen_err("bad pginfo->type=%x", pginfo->type);
2136                 ret = -EFAULT;
2137                 break;
2138         }
2139         return ret;
2140 } /* end ehca_set_pagebuf() */
2141
2142 /*----------------------------------------------------------------------*/
2143
2144 /*
2145  * check MR if it is a max-MR, i.e. uses whole memory
2146  * in case it's a max-MR 1 is returned, else 0
2147  */
2148 int ehca_mr_is_maxmr(u64 size,
2149                      u64 *iova_start)
2150 {
2151         /* a MR is treated as max-MR only if it fits following: */
2152         if ((size == ehca_mr_len) &&
2153             (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
2154                 ehca_gen_dbg("this is a max-MR");
2155                 return 1;
2156         } else
2157                 return 0;
2158 } /* end ehca_mr_is_maxmr() */
2159
2160 /*----------------------------------------------------------------------*/
2161
2162 /* map access control for MR/MW. This routine is used for MR and MW. */
2163 void ehca_mrmw_map_acl(int ib_acl,
2164                        u32 *hipz_acl)
2165 {
2166         *hipz_acl = 0;
2167         if (ib_acl & IB_ACCESS_REMOTE_READ)
2168                 *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
2169         if (ib_acl & IB_ACCESS_REMOTE_WRITE)
2170                 *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
2171         if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
2172                 *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2173         if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2174                 *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2175         if (ib_acl & IB_ACCESS_MW_BIND)
2176                 *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2177 } /* end ehca_mrmw_map_acl() */
2178
2179 /*----------------------------------------------------------------------*/
2180
2181 /* sets page size in hipz access control for MR/MW. */
2182 void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
2183 {
2184         *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
2185 } /* end ehca_mrmw_set_pgsize_hipz_acl() */
2186
2187 /*----------------------------------------------------------------------*/
2188
2189 /*
2190  * reverse map access control for MR/MW.
2191  * This routine is used for MR and MW.
2192  */
2193 void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2194                                int *ib_acl) /*OUT*/
2195 {
2196         *ib_acl = 0;
2197         if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2198                 *ib_acl |= IB_ACCESS_REMOTE_READ;
2199         if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2200                 *ib_acl |= IB_ACCESS_REMOTE_WRITE;
2201         if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2202                 *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2203         if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2204                 *ib_acl |= IB_ACCESS_LOCAL_WRITE;
2205         if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2206                 *ib_acl |= IB_ACCESS_MW_BIND;
2207 } /* end ehca_mrmw_reverse_map_acl() */
2208
2209
2210 /*----------------------------------------------------------------------*/
2211
2212 /*
2213  * MR destructor and constructor
2214  * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2215  * except struct ib_mr and spinlock
2216  */
2217 void ehca_mr_deletenew(struct ehca_mr *mr)
2218 {
2219         mr->flags = 0;
2220         mr->num_kpages = 0;
2221         mr->num_hwpages = 0;
2222         mr->acl = 0;
2223         mr->start = NULL;
2224         mr->fmr_page_size = 0;
2225         mr->fmr_max_pages = 0;
2226         mr->fmr_max_maps = 0;
2227         mr->fmr_map_cnt = 0;
2228         memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2229         memset(&mr->galpas, 0, sizeof(mr->galpas));
2230 } /* end ehca_mr_deletenew() */
2231
2232 int ehca_init_mrmw_cache(void)
2233 {
2234         mr_cache = kmem_cache_create("ehca_cache_mr",
2235                                      sizeof(struct ehca_mr), 0,
2236                                      SLAB_HWCACHE_ALIGN,
2237                                      NULL);
2238         if (!mr_cache)
2239                 return -ENOMEM;
2240         mw_cache = kmem_cache_create("ehca_cache_mw",
2241                                      sizeof(struct ehca_mw), 0,
2242                                      SLAB_HWCACHE_ALIGN,
2243                                      NULL);
2244         if (!mw_cache) {
2245                 kmem_cache_destroy(mr_cache);
2246                 mr_cache = NULL;
2247                 return -ENOMEM;
2248         }
2249         return 0;
2250 }
2251
2252 void ehca_cleanup_mrmw_cache(void)
2253 {
2254         if (mr_cache)
2255                 kmem_cache_destroy(mr_cache);
2256         if (mw_cache)
2257                 kmem_cache_destroy(mw_cache);
2258 }
2259
2260 static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
2261                                      int dir)
2262 {
2263         if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
2264                 ehca_top_bmap->dir[dir] =
2265                         kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
2266                 if (!ehca_top_bmap->dir[dir])
2267                         return -ENOMEM;
2268                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2269                 memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
2270         }
2271         return 0;
2272 }
2273
2274 static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
2275 {
2276         if (!ehca_bmap_valid(ehca_bmap->top[top])) {
2277                 ehca_bmap->top[top] =
2278                         kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
2279                 if (!ehca_bmap->top[top])
2280                         return -ENOMEM;
2281                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2282                 memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
2283         }
2284         return ehca_init_top_bmap(ehca_bmap->top[top], dir);
2285 }
2286
2287 static inline int ehca_calc_index(unsigned long i, unsigned long s)
2288 {
2289         return (i >> s) & EHCA_INDEX_MASK;
2290 }
2291
2292 void ehca_destroy_busmap(void)
2293 {
2294         int top, dir;
2295
2296         if (!ehca_bmap)
2297                 return;
2298
2299         for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2300                 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2301                         continue;
2302                 for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
2303                         if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2304                                 continue;
2305
2306                         kfree(ehca_bmap->top[top]->dir[dir]);
2307                 }
2308
2309                 kfree(ehca_bmap->top[top]);
2310         }
2311
2312         kfree(ehca_bmap);
2313         ehca_bmap = NULL;
2314 }
2315
2316 static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2317 {
2318         unsigned long i, start_section, end_section;
2319         int top, dir, idx;
2320
2321         if (!nr_pages)
2322                 return 0;
2323
2324         if (!ehca_bmap) {
2325                 ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
2326                 if (!ehca_bmap)
2327                         return -ENOMEM;
2328                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2329                 memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2330         }
2331
2332         start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2333         end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2334         for (i = start_section; i < end_section; i++) {
2335                 int ret;
2336                 top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
2337                 dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
2338                 idx = i & EHCA_INDEX_MASK;
2339
2340                 ret = ehca_init_bmap(ehca_bmap, top, dir);
2341                 if (ret) {
2342                         ehca_destroy_busmap();
2343                         return ret;
2344                 }
2345                 ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
2346                 ehca_mr_len += EHCA_SECTSIZE;
2347         }
2348         return 0;
2349 }
2350
2351 static int ehca_is_hugepage(unsigned long pfn)
2352 {
2353         int page_order;
2354
2355         if (pfn & EHCA_HUGEPAGE_PFN_MASK)
2356                 return 0;
2357
2358         page_order = compound_order(pfn_to_page(pfn));
2359         if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
2360                 return 0;
2361
2362         return 1;
2363 }
2364
2365 static int ehca_create_busmap_callback(unsigned long initial_pfn,
2366                                        unsigned long total_nr_pages, void *arg)
2367 {
2368         int ret;
2369         unsigned long pfn, start_pfn, end_pfn, nr_pages;
2370
2371         if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
2372                 return ehca_update_busmap(initial_pfn, total_nr_pages);
2373
2374         /* Given chunk is >= 16GB -> check for hugepages */
2375         start_pfn = initial_pfn;
2376         end_pfn = initial_pfn + total_nr_pages;
2377         pfn = start_pfn;
2378
2379         while (pfn < end_pfn) {
2380                 if (ehca_is_hugepage(pfn)) {
2381                         /* Add mem found in front of the hugepage */
2382                         nr_pages = pfn - start_pfn;
2383                         ret = ehca_update_busmap(start_pfn, nr_pages);
2384                         if (ret)
2385                                 return ret;
2386                         /* Skip the hugepage */
2387                         pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
2388                         start_pfn = pfn;
2389                 } else
2390                         pfn += (EHCA_SECTSIZE / PAGE_SIZE);
2391         }
2392
2393         /* Add mem found behind the hugepage(s)  */
2394         nr_pages = pfn - start_pfn;
2395         return ehca_update_busmap(start_pfn, nr_pages);
2396 }
2397
2398 int ehca_create_busmap(void)
2399 {
2400         int ret;
2401
2402         ehca_mr_len = 0;
2403         ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
2404                                    ehca_create_busmap_callback);
2405         return ret;
2406 }
2407
2408 static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
2409                                    struct ehca_mr *e_mr,
2410                                    struct ehca_mr_pginfo *pginfo)
2411 {
2412         int top;
2413         u64 hret, *kpage;
2414
2415         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
2416         if (!kpage) {
2417                 ehca_err(&shca->ib_device, "kpage alloc failed");
2418                 return -ENOMEM;
2419         }
2420         for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2421                 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2422                         continue;
2423                 hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
2424                 if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
2425                         break;
2426         }
2427
2428         ehca_free_fw_ctrlblock(kpage);
2429
2430         if (hret == H_SUCCESS)
2431                 return 0; /* Everything is fine */
2432         else {
2433                 ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
2434                                  "h_ret=%lli e_mr=%p top=%x lkey=%x "
2435                                  "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
2436                                  e_mr->ib.ib_mr.lkey,
2437                                  shca->ipz_hca_handle.handle,
2438                                  e_mr->ipz_mr_handle.handle);
2439                 return ehca2ib_return_code(hret);
2440         }
2441 }
2442
2443 static u64 ehca_map_vaddr(void *caddr)
2444 {
2445         int top, dir, idx;
2446         unsigned long abs_addr, offset;
2447         u64 entry;
2448
2449         if (!ehca_bmap)
2450                 return EHCA_INVAL_ADDR;
2451
2452         abs_addr = __pa(caddr);
2453         top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2454         if (!ehca_bmap_valid(ehca_bmap->top[top]))
2455                 return EHCA_INVAL_ADDR;
2456
2457         dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
2458         if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2459                 return EHCA_INVAL_ADDR;
2460
2461         idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
2462
2463         entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
2464         if (ehca_bmap_valid(entry)) {
2465                 offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
2466                 return entry | offset;
2467         } else
2468                 return EHCA_INVAL_ADDR;
2469 }
2470
2471 static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
2472 {
2473         return dma_addr == EHCA_INVAL_ADDR;
2474 }
2475
2476 static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
2477                                size_t size, enum dma_data_direction direction)
2478 {
2479         if (cpu_addr)
2480                 return ehca_map_vaddr(cpu_addr);
2481         else
2482                 return EHCA_INVAL_ADDR;
2483 }
2484
2485 static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
2486                                   enum dma_data_direction direction)
2487 {
2488         /* This is only a stub; nothing to be done here */
2489 }
2490
2491 static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
2492                              unsigned long offset, size_t size,
2493                              enum dma_data_direction direction)
2494 {
2495         u64 addr;
2496
2497         if (offset + size > PAGE_SIZE)
2498                 return EHCA_INVAL_ADDR;
2499
2500         addr = ehca_map_vaddr(page_address(page));
2501         if (!ehca_dma_mapping_error(dev, addr))
2502                 addr += offset;
2503
2504         return addr;
2505 }
2506
2507 static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
2508                                 enum dma_data_direction direction)
2509 {
2510         /* This is only a stub; nothing to be done here */
2511 }
2512
2513 static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
2514                            int nents, enum dma_data_direction direction)
2515 {
2516         struct scatterlist *sg;
2517         int i;
2518
2519         for_each_sg(sgl, sg, nents, i) {
2520                 u64 addr;
2521                 addr = ehca_map_vaddr(sg_virt(sg));
2522                 if (ehca_dma_mapping_error(dev, addr))
2523                         return 0;
2524
2525                 sg->dma_address = addr;
2526                 sg->dma_length = sg->length;
2527         }
2528         return nents;
2529 }
2530
2531 static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
2532                               int nents, enum dma_data_direction direction)
2533 {
2534         /* This is only a stub; nothing to be done here */
2535 }
2536
2537 static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
2538                                          size_t size,
2539                                          enum dma_data_direction dir)
2540 {
2541         dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
2542 }
2543
2544 static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
2545                                             size_t size,
2546                                             enum dma_data_direction dir)
2547 {
2548         dma_sync_single_for_device(dev->dma_device, addr, size, dir);
2549 }
2550
2551 static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
2552                                      u64 *dma_handle, gfp_t flag)
2553 {
2554         struct page *p;
2555         void *addr = NULL;
2556         u64 dma_addr;
2557
2558         p = alloc_pages(flag, get_order(size));
2559         if (p) {
2560                 addr = page_address(p);
2561                 dma_addr = ehca_map_vaddr(addr);
2562                 if (ehca_dma_mapping_error(dev, dma_addr)) {
2563                         free_pages((unsigned long)addr, get_order(size));
2564                         return NULL;
2565                 }
2566                 if (dma_handle)
2567                         *dma_handle = dma_addr;
2568                 return addr;
2569         }
2570         return NULL;
2571 }
2572
2573 static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
2574                                    void *cpu_addr, u64 dma_handle)
2575 {
2576         if (cpu_addr && size)
2577                 free_pages((unsigned long)cpu_addr, get_order(size));
2578 }
2579
2580
2581 struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
2582         .mapping_error          = ehca_dma_mapping_error,
2583         .map_single             = ehca_dma_map_single,
2584         .unmap_single           = ehca_dma_unmap_single,
2585         .map_page               = ehca_dma_map_page,
2586         .unmap_page             = ehca_dma_unmap_page,
2587         .map_sg                 = ehca_dma_map_sg,
2588         .unmap_sg               = ehca_dma_unmap_sg,
2589         .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
2590         .sync_single_for_device = ehca_dma_sync_single_for_device,
2591         .alloc_coherent         = ehca_dma_alloc_coherent,
2592         .free_coherent          = ehca_dma_free_coherent,
2593 };