regmap: rbtree: Fixed node range check on sync
[firefly-linux-kernel-4.4.55.git] / drivers / misc / vmw_vmci / vmci_queue_pair.c
1 /*
2  * VMware VMCI Driver
3  *
4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation version 2 and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * for more details.
14  */
15
16 #include <linux/vmw_vmci_defs.h>
17 #include <linux/vmw_vmci_api.h>
18 #include <linux/highmem.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/module.h>
22 #include <linux/mutex.h>
23 #include <linux/pagemap.h>
24 #include <linux/sched.h>
25 #include <linux/slab.h>
26 #include <linux/socket.h>
27 #include <linux/wait.h>
28 #include <linux/vmalloc.h>
29
30 #include "vmci_handle_array.h"
31 #include "vmci_queue_pair.h"
32 #include "vmci_datagram.h"
33 #include "vmci_resource.h"
34 #include "vmci_context.h"
35 #include "vmci_driver.h"
36 #include "vmci_event.h"
37 #include "vmci_route.h"
38
39 /*
40  * In the following, we will distinguish between two kinds of VMX processes -
41  * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
42  * VMCI page files in the VMX and supporting VM to VM communication and the
43  * newer ones that use the guest memory directly. We will in the following
44  * refer to the older VMX versions as old-style VMX'en, and the newer ones as
45  * new-style VMX'en.
46  *
47  * The state transition datagram is as follows (the VMCIQPB_ prefix has been
48  * removed for readability) - see below for more details on the transtions:
49  *
50  *            --------------  NEW  -------------
51  *            |                                |
52  *           \_/                              \_/
53  *     CREATED_NO_MEM <-----------------> CREATED_MEM
54  *            |    |                           |
55  *            |    o-----------------------o   |
56  *            |                            |   |
57  *           \_/                          \_/ \_/
58  *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
59  *            |                            |   |
60  *            |     o----------------------o   |
61  *            |     |                          |
62  *           \_/   \_/                        \_/
63  *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
64  *            |                                |
65  *            |                                |
66  *            -------------> gone <-------------
67  *
68  * In more detail. When a VMCI queue pair is first created, it will be in the
69  * VMCIQPB_NEW state. It will then move into one of the following states:
70  *
71  * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
72  *
73  *     - the created was performed by a host endpoint, in which case there is
74  *       no backing memory yet.
75  *
76  *     - the create was initiated by an old-style VMX, that uses
77  *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
78  *       a later point in time. This state can be distinguished from the one
79  *       above by the context ID of the creator. A host side is not allowed to
80  *       attach until the page store has been set.
81  *
82  * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
83  *     is created by a VMX using the queue pair device backend that
84  *     sets the UVAs of the queue pair immediately and stores the
85  *     information for later attachers. At this point, it is ready for
86  *     the host side to attach to it.
87  *
88  * Once the queue pair is in one of the created states (with the exception of
89  * the case mentioned for older VMX'en above), it is possible to attach to the
90  * queue pair. Again we have two new states possible:
91  *
92  * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
93  *   paths:
94  *
95  *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
96  *       pair, and attaches to a queue pair previously created by the host side.
97  *
98  *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
99  *       already created by a guest.
100  *
101  *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
102  *       vmci_qp_broker_set_page_store (see below).
103  *
104  * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
105  *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
106  *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
107  *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
108  *     will be entered.
109  *
110  * From the attached queue pair, the queue pair can enter the shutdown states
111  * when either side of the queue pair detaches. If the guest side detaches
112  * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
113  * the content of the queue pair will no longer be available. If the host
114  * side detaches first, the queue pair will either enter the
115  * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
116  * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
117  * (e.g., the host detaches while a guest is stunned).
118  *
119  * New-style VMX'en will also unmap guest memory, if the guest is
120  * quiesced, e.g., during a snapshot operation. In that case, the guest
121  * memory will no longer be available, and the queue pair will transition from
122  * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
123  * in which case the queue pair will transition from the *_NO_MEM state at that
124  * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
125  * since the peer may have either attached or detached in the meantime. The
126  * values are laid out such that ++ on a state will move from a *_NO_MEM to a
127  * *_MEM state, and vice versa.
128  */
129
130 /*
131  * VMCIMemcpy{To,From}QueueFunc() prototypes.  Functions of these
132  * types are passed around to enqueue and dequeue routines.  Note that
133  * often the functions passed are simply wrappers around memcpy
134  * itself.
135  *
136  * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
137  * there's an unused last parameter for the hosted side.  In
138  * ESX, that parameter holds a buffer type.
139  */
140 typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
141                                       u64 queue_offset, const void *src,
142                                       size_t src_offset, size_t size);
143 typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
144                                         const struct vmci_queue *queue,
145                                         u64 queue_offset, size_t size);
146
147 /* The Kernel specific component of the struct vmci_queue structure. */
148 struct vmci_queue_kern_if {
149         struct page **page;
150         struct page **header_page;
151         void *va;
152         struct mutex __mutex;   /* Protects the queue. */
153         struct mutex *mutex;    /* Shared by producer and consumer queues. */
154         bool host;
155         size_t num_pages;
156         bool mapped;
157 };
158
159 /*
160  * This structure is opaque to the clients.
161  */
162 struct vmci_qp {
163         struct vmci_handle handle;
164         struct vmci_queue *produce_q;
165         struct vmci_queue *consume_q;
166         u64 produce_q_size;
167         u64 consume_q_size;
168         u32 peer;
169         u32 flags;
170         u32 priv_flags;
171         bool guest_endpoint;
172         unsigned int blocked;
173         unsigned int generation;
174         wait_queue_head_t event;
175 };
176
177 enum qp_broker_state {
178         VMCIQPB_NEW,
179         VMCIQPB_CREATED_NO_MEM,
180         VMCIQPB_CREATED_MEM,
181         VMCIQPB_ATTACHED_NO_MEM,
182         VMCIQPB_ATTACHED_MEM,
183         VMCIQPB_SHUTDOWN_NO_MEM,
184         VMCIQPB_SHUTDOWN_MEM,
185         VMCIQPB_GONE
186 };
187
188 #define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
189                                      _qpb->state == VMCIQPB_ATTACHED_MEM || \
190                                      _qpb->state == VMCIQPB_SHUTDOWN_MEM)
191
192 /*
193  * In the queue pair broker, we always use the guest point of view for
194  * the produce and consume queue values and references, e.g., the
195  * produce queue size stored is the guests produce queue size. The
196  * host endpoint will need to swap these around. The only exception is
197  * the local queue pairs on the host, in which case the host endpoint
198  * that creates the queue pair will have the right orientation, and
199  * the attaching host endpoint will need to swap.
200  */
201 struct qp_entry {
202         struct list_head list_item;
203         struct vmci_handle handle;
204         u32 peer;
205         u32 flags;
206         u64 produce_size;
207         u64 consume_size;
208         u32 ref_count;
209 };
210
211 struct qp_broker_entry {
212         struct vmci_resource resource;
213         struct qp_entry qp;
214         u32 create_id;
215         u32 attach_id;
216         enum qp_broker_state state;
217         bool require_trusted_attach;
218         bool created_by_trusted;
219         bool vmci_page_files;   /* Created by VMX using VMCI page files */
220         struct vmci_queue *produce_q;
221         struct vmci_queue *consume_q;
222         struct vmci_queue_header saved_produce_q;
223         struct vmci_queue_header saved_consume_q;
224         vmci_event_release_cb wakeup_cb;
225         void *client_data;
226         void *local_mem;        /* Kernel memory for local queue pair */
227 };
228
229 struct qp_guest_endpoint {
230         struct vmci_resource resource;
231         struct qp_entry qp;
232         u64 num_ppns;
233         void *produce_q;
234         void *consume_q;
235         struct ppn_set ppn_set;
236 };
237
238 struct qp_list {
239         struct list_head head;
240         struct mutex mutex;     /* Protect queue list. */
241 };
242
243 static struct qp_list qp_broker_list = {
244         .head = LIST_HEAD_INIT(qp_broker_list.head),
245         .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
246 };
247
248 static struct qp_list qp_guest_endpoints = {
249         .head = LIST_HEAD_INIT(qp_guest_endpoints.head),
250         .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
251 };
252
253 #define INVALID_VMCI_GUEST_MEM_ID  0
254 #define QPE_NUM_PAGES(_QPE) ((u32) \
255                              (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
256                               DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
257
258
259 /*
260  * Frees kernel VA space for a given queue and its queue header, and
261  * frees physical data pages.
262  */
263 static void qp_free_queue(void *q, u64 size)
264 {
265         struct vmci_queue *queue = q;
266
267         if (queue) {
268                 u64 i = DIV_ROUND_UP(size, PAGE_SIZE);
269
270                 if (queue->kernel_if->mapped) {
271                         vunmap(queue->kernel_if->va);
272                         queue->kernel_if->va = NULL;
273                 }
274
275                 while (i)
276                         __free_page(queue->kernel_if->page[--i]);
277
278                 vfree(queue->q_header);
279         }
280 }
281
282 /*
283  * Allocates kernel VA space of specified size, plus space for the
284  * queue structure/kernel interface and the queue header.  Allocates
285  * physical pages for the queue data pages.
286  *
287  * PAGE m:      struct vmci_queue_header (struct vmci_queue->q_header)
288  * PAGE m+1:    struct vmci_queue
289  * PAGE m+1+q:  struct vmci_queue_kern_if (struct vmci_queue->kernel_if)
290  * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[])
291  */
292 static void *qp_alloc_queue(u64 size, u32 flags)
293 {
294         u64 i;
295         struct vmci_queue *queue;
296         struct vmci_queue_header *q_header;
297         const u64 num_data_pages = DIV_ROUND_UP(size, PAGE_SIZE);
298         const uint queue_size =
299             PAGE_SIZE +
300             sizeof(*queue) + sizeof(*(queue->kernel_if)) +
301             num_data_pages * sizeof(*(queue->kernel_if->page));
302
303         q_header = vmalloc(queue_size);
304         if (!q_header)
305                 return NULL;
306
307         queue = (void *)q_header + PAGE_SIZE;
308         queue->q_header = q_header;
309         queue->saved_header = NULL;
310         queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
311         queue->kernel_if->header_page = NULL;   /* Unused in guest. */
312         queue->kernel_if->page = (struct page **)(queue->kernel_if + 1);
313         queue->kernel_if->host = false;
314         queue->kernel_if->va = NULL;
315         queue->kernel_if->mapped = false;
316
317         for (i = 0; i < num_data_pages; i++) {
318                 queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0);
319                 if (!queue->kernel_if->page[i])
320                         goto fail;
321         }
322
323         if (vmci_qp_pinned(flags)) {
324                 queue->kernel_if->va =
325                     vmap(queue->kernel_if->page, num_data_pages, VM_MAP,
326                          PAGE_KERNEL);
327                 if (!queue->kernel_if->va)
328                         goto fail;
329
330                 queue->kernel_if->mapped = true;
331         }
332
333         return (void *)queue;
334
335  fail:
336         qp_free_queue(queue, i * PAGE_SIZE);
337         return NULL;
338 }
339
340 /*
341  * Copies from a given buffer or iovector to a VMCI Queue.  Uses
342  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
343  * by traversing the offset -> page translation structure for the queue.
344  * Assumes that offset + size does not wrap around in the queue.
345  */
346 static int __qp_memcpy_to_queue(struct vmci_queue *queue,
347                                 u64 queue_offset,
348                                 const void *src,
349                                 size_t size,
350                                 bool is_iovec)
351 {
352         struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
353         size_t bytes_copied = 0;
354
355         while (bytes_copied < size) {
356                 u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
357                 size_t page_offset =
358                     (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
359                 void *va;
360                 size_t to_copy;
361
362                 if (!kernel_if->mapped)
363                         va = kmap(kernel_if->page[page_index]);
364                 else
365                         va = (void *)((u8 *)kernel_if->va +
366                                       (page_index * PAGE_SIZE));
367
368                 if (size - bytes_copied > PAGE_SIZE - page_offset)
369                         /* Enough payload to fill up from this page. */
370                         to_copy = PAGE_SIZE - page_offset;
371                 else
372                         to_copy = size - bytes_copied;
373
374                 if (is_iovec) {
375                         struct iovec *iov = (struct iovec *)src;
376                         int err;
377
378                         /* The iovec will track bytes_copied internally. */
379                         err = memcpy_fromiovec((u8 *)va + page_offset,
380                                                iov, to_copy);
381                         if (err != 0) {
382                                 kunmap(kernel_if->page[page_index]);
383                                 return VMCI_ERROR_INVALID_ARGS;
384                         }
385                 } else {
386                         memcpy((u8 *)va + page_offset,
387                                (u8 *)src + bytes_copied, to_copy);
388                 }
389
390                 bytes_copied += to_copy;
391                 if (!kernel_if->mapped)
392                         kunmap(kernel_if->page[page_index]);
393         }
394
395         return VMCI_SUCCESS;
396 }
397
398 /*
399  * Copies to a given buffer or iovector from a VMCI Queue.  Uses
400  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
401  * by traversing the offset -> page translation structure for the queue.
402  * Assumes that offset + size does not wrap around in the queue.
403  */
404 static int __qp_memcpy_from_queue(void *dest,
405                                   const struct vmci_queue *queue,
406                                   u64 queue_offset,
407                                   size_t size,
408                                   bool is_iovec)
409 {
410         struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
411         size_t bytes_copied = 0;
412
413         while (bytes_copied < size) {
414                 u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
415                 size_t page_offset =
416                     (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
417                 void *va;
418                 size_t to_copy;
419
420                 if (!kernel_if->mapped)
421                         va = kmap(kernel_if->page[page_index]);
422                 else
423                         va = (void *)((u8 *)kernel_if->va +
424                                       (page_index * PAGE_SIZE));
425
426                 if (size - bytes_copied > PAGE_SIZE - page_offset)
427                         /* Enough payload to fill up this page. */
428                         to_copy = PAGE_SIZE - page_offset;
429                 else
430                         to_copy = size - bytes_copied;
431
432                 if (is_iovec) {
433                         struct iovec *iov = (struct iovec *)dest;
434                         int err;
435
436                         /* The iovec will track bytes_copied internally. */
437                         err = memcpy_toiovec(iov, (u8 *)va + page_offset,
438                                              to_copy);
439                         if (err != 0) {
440                                 kunmap(kernel_if->page[page_index]);
441                                 return VMCI_ERROR_INVALID_ARGS;
442                         }
443                 } else {
444                         memcpy((u8 *)dest + bytes_copied,
445                                (u8 *)va + page_offset, to_copy);
446                 }
447
448                 bytes_copied += to_copy;
449                 if (!kernel_if->mapped)
450                         kunmap(kernel_if->page[page_index]);
451         }
452
453         return VMCI_SUCCESS;
454 }
455
456 /*
457  * Allocates two list of PPNs --- one for the pages in the produce queue,
458  * and the other for the pages in the consume queue. Intializes the list
459  * of PPNs with the page frame numbers of the KVA for the two queues (and
460  * the queue headers).
461  */
462 static int qp_alloc_ppn_set(void *prod_q,
463                             u64 num_produce_pages,
464                             void *cons_q,
465                             u64 num_consume_pages, struct ppn_set *ppn_set)
466 {
467         u32 *produce_ppns;
468         u32 *consume_ppns;
469         struct vmci_queue *produce_q = prod_q;
470         struct vmci_queue *consume_q = cons_q;
471         u64 i;
472
473         if (!produce_q || !num_produce_pages || !consume_q ||
474             !num_consume_pages || !ppn_set)
475                 return VMCI_ERROR_INVALID_ARGS;
476
477         if (ppn_set->initialized)
478                 return VMCI_ERROR_ALREADY_EXISTS;
479
480         produce_ppns =
481             kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
482         if (!produce_ppns)
483                 return VMCI_ERROR_NO_MEM;
484
485         consume_ppns =
486             kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
487         if (!consume_ppns) {
488                 kfree(produce_ppns);
489                 return VMCI_ERROR_NO_MEM;
490         }
491
492         produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header));
493         for (i = 1; i < num_produce_pages; i++) {
494                 unsigned long pfn;
495
496                 produce_ppns[i] =
497                     page_to_pfn(produce_q->kernel_if->page[i - 1]);
498                 pfn = produce_ppns[i];
499
500                 /* Fail allocation if PFN isn't supported by hypervisor. */
501                 if (sizeof(pfn) > sizeof(*produce_ppns)
502                     && pfn != produce_ppns[i])
503                         goto ppn_error;
504         }
505
506         consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header));
507         for (i = 1; i < num_consume_pages; i++) {
508                 unsigned long pfn;
509
510                 consume_ppns[i] =
511                     page_to_pfn(consume_q->kernel_if->page[i - 1]);
512                 pfn = consume_ppns[i];
513
514                 /* Fail allocation if PFN isn't supported by hypervisor. */
515                 if (sizeof(pfn) > sizeof(*consume_ppns)
516                     && pfn != consume_ppns[i])
517                         goto ppn_error;
518         }
519
520         ppn_set->num_produce_pages = num_produce_pages;
521         ppn_set->num_consume_pages = num_consume_pages;
522         ppn_set->produce_ppns = produce_ppns;
523         ppn_set->consume_ppns = consume_ppns;
524         ppn_set->initialized = true;
525         return VMCI_SUCCESS;
526
527  ppn_error:
528         kfree(produce_ppns);
529         kfree(consume_ppns);
530         return VMCI_ERROR_INVALID_ARGS;
531 }
532
533 /*
534  * Frees the two list of PPNs for a queue pair.
535  */
536 static void qp_free_ppn_set(struct ppn_set *ppn_set)
537 {
538         if (ppn_set->initialized) {
539                 /* Do not call these functions on NULL inputs. */
540                 kfree(ppn_set->produce_ppns);
541                 kfree(ppn_set->consume_ppns);
542         }
543         memset(ppn_set, 0, sizeof(*ppn_set));
544 }
545
546 /*
547  * Populates the list of PPNs in the hypercall structure with the PPNS
548  * of the produce queue and the consume queue.
549  */
550 static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
551 {
552         memcpy(call_buf, ppn_set->produce_ppns,
553                ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
554         memcpy(call_buf +
555                ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
556                ppn_set->consume_ppns,
557                ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
558
559         return VMCI_SUCCESS;
560 }
561
562 static int qp_memcpy_to_queue(struct vmci_queue *queue,
563                               u64 queue_offset,
564                               const void *src, size_t src_offset, size_t size)
565 {
566         return __qp_memcpy_to_queue(queue, queue_offset,
567                                     (u8 *)src + src_offset, size, false);
568 }
569
570 static int qp_memcpy_from_queue(void *dest,
571                                 size_t dest_offset,
572                                 const struct vmci_queue *queue,
573                                 u64 queue_offset, size_t size)
574 {
575         return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
576                                       queue, queue_offset, size, false);
577 }
578
579 /*
580  * Copies from a given iovec from a VMCI Queue.
581  */
582 static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
583                                   u64 queue_offset,
584                                   const void *src,
585                                   size_t src_offset, size_t size)
586 {
587
588         /*
589          * We ignore src_offset because src is really a struct iovec * and will
590          * maintain offset internally.
591          */
592         return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
593 }
594
595 /*
596  * Copies to a given iovec from a VMCI Queue.
597  */
598 static int qp_memcpy_from_queue_iov(void *dest,
599                                     size_t dest_offset,
600                                     const struct vmci_queue *queue,
601                                     u64 queue_offset, size_t size)
602 {
603         /*
604          * We ignore dest_offset because dest is really a struct iovec * and
605          * will maintain offset internally.
606          */
607         return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
608 }
609
610 /*
611  * Allocates kernel VA space of specified size plus space for the queue
612  * and kernel interface.  This is different from the guest queue allocator,
613  * because we do not allocate our own queue header/data pages here but
614  * share those of the guest.
615  */
616 static struct vmci_queue *qp_host_alloc_queue(u64 size)
617 {
618         struct vmci_queue *queue;
619         const size_t num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
620         const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
621         const size_t queue_page_size =
622             num_pages * sizeof(*queue->kernel_if->page);
623
624         queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
625         if (queue) {
626                 queue->q_header = NULL;
627                 queue->saved_header = NULL;
628                 queue->kernel_if =
629                     (struct vmci_queue_kern_if *)((u8 *)queue +
630                                                   sizeof(*queue));
631                 queue->kernel_if->host = true;
632                 queue->kernel_if->mutex = NULL;
633                 queue->kernel_if->num_pages = num_pages;
634                 queue->kernel_if->header_page =
635                     (struct page **)((u8 *)queue + queue_size);
636                 queue->kernel_if->page = &queue->kernel_if->header_page[1];
637                 queue->kernel_if->va = NULL;
638                 queue->kernel_if->mapped = false;
639         }
640
641         return queue;
642 }
643
644 /*
645  * Frees kernel memory for a given queue (header plus translation
646  * structure).
647  */
648 static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
649 {
650         kfree(queue);
651 }
652
653 /*
654  * Initialize the mutex for the pair of queues.  This mutex is used to
655  * protect the q_header and the buffer from changing out from under any
656  * users of either queue.  Of course, it's only any good if the mutexes
657  * are actually acquired.  Queue structure must lie on non-paged memory
658  * or we cannot guarantee access to the mutex.
659  */
660 static void qp_init_queue_mutex(struct vmci_queue *produce_q,
661                                 struct vmci_queue *consume_q)
662 {
663         /*
664          * Only the host queue has shared state - the guest queues do not
665          * need to synchronize access using a queue mutex.
666          */
667
668         if (produce_q->kernel_if->host) {
669                 produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
670                 consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
671                 mutex_init(produce_q->kernel_if->mutex);
672         }
673 }
674
675 /*
676  * Cleans up the mutex for the pair of queues.
677  */
678 static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
679                                    struct vmci_queue *consume_q)
680 {
681         if (produce_q->kernel_if->host) {
682                 produce_q->kernel_if->mutex = NULL;
683                 consume_q->kernel_if->mutex = NULL;
684         }
685 }
686
687 /*
688  * Acquire the mutex for the queue.  Note that the produce_q and
689  * the consume_q share a mutex.  So, only one of the two need to
690  * be passed in to this routine.  Either will work just fine.
691  */
692 static void qp_acquire_queue_mutex(struct vmci_queue *queue)
693 {
694         if (queue->kernel_if->host)
695                 mutex_lock(queue->kernel_if->mutex);
696 }
697
698 /*
699  * Release the mutex for the queue.  Note that the produce_q and
700  * the consume_q share a mutex.  So, only one of the two need to
701  * be passed in to this routine.  Either will work just fine.
702  */
703 static void qp_release_queue_mutex(struct vmci_queue *queue)
704 {
705         if (queue->kernel_if->host)
706                 mutex_unlock(queue->kernel_if->mutex);
707 }
708
709 /*
710  * Helper function to release pages in the PageStoreAttachInfo
711  * previously obtained using get_user_pages.
712  */
713 static void qp_release_pages(struct page **pages,
714                              u64 num_pages, bool dirty)
715 {
716         int i;
717
718         for (i = 0; i < num_pages; i++) {
719                 if (dirty)
720                         set_page_dirty(pages[i]);
721
722                 page_cache_release(pages[i]);
723                 pages[i] = NULL;
724         }
725 }
726
727 /*
728  * Lock the user pages referenced by the {produce,consume}Buffer
729  * struct into memory and populate the {produce,consume}Pages
730  * arrays in the attach structure with them.
731  */
732 static int qp_host_get_user_memory(u64 produce_uva,
733                                    u64 consume_uva,
734                                    struct vmci_queue *produce_q,
735                                    struct vmci_queue *consume_q)
736 {
737         int retval;
738         int err = VMCI_SUCCESS;
739
740         down_write(&current->mm->mmap_sem);
741         retval = get_user_pages(current,
742                                 current->mm,
743                                 (uintptr_t) produce_uva,
744                                 produce_q->kernel_if->num_pages,
745                                 1, 0, produce_q->kernel_if->header_page, NULL);
746         if (retval < produce_q->kernel_if->num_pages) {
747                 pr_warn("get_user_pages(produce) failed (retval=%d)", retval);
748                 qp_release_pages(produce_q->kernel_if->header_page, retval,
749                                  false);
750                 err = VMCI_ERROR_NO_MEM;
751                 goto out;
752         }
753
754         retval = get_user_pages(current,
755                                 current->mm,
756                                 (uintptr_t) consume_uva,
757                                 consume_q->kernel_if->num_pages,
758                                 1, 0, consume_q->kernel_if->header_page, NULL);
759         if (retval < consume_q->kernel_if->num_pages) {
760                 pr_warn("get_user_pages(consume) failed (retval=%d)", retval);
761                 qp_release_pages(consume_q->kernel_if->header_page, retval,
762                                  false);
763                 qp_release_pages(produce_q->kernel_if->header_page,
764                                  produce_q->kernel_if->num_pages, false);
765                 err = VMCI_ERROR_NO_MEM;
766         }
767
768  out:
769         up_write(&current->mm->mmap_sem);
770
771         return err;
772 }
773
774 /*
775  * Registers the specification of the user pages used for backing a queue
776  * pair. Enough information to map in pages is stored in the OS specific
777  * part of the struct vmci_queue structure.
778  */
779 static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
780                                         struct vmci_queue *produce_q,
781                                         struct vmci_queue *consume_q)
782 {
783         u64 produce_uva;
784         u64 consume_uva;
785
786         /*
787          * The new style and the old style mapping only differs in
788          * that we either get a single or two UVAs, so we split the
789          * single UVA range at the appropriate spot.
790          */
791         produce_uva = page_store->pages;
792         consume_uva = page_store->pages +
793             produce_q->kernel_if->num_pages * PAGE_SIZE;
794         return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
795                                        consume_q);
796 }
797
798 /*
799  * Releases and removes the references to user pages stored in the attach
800  * struct.  Pages are released from the page cache and may become
801  * swappable again.
802  */
803 static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
804                                            struct vmci_queue *consume_q)
805 {
806         qp_release_pages(produce_q->kernel_if->header_page,
807                          produce_q->kernel_if->num_pages, true);
808         memset(produce_q->kernel_if->header_page, 0,
809                sizeof(*produce_q->kernel_if->header_page) *
810                produce_q->kernel_if->num_pages);
811         qp_release_pages(consume_q->kernel_if->header_page,
812                          consume_q->kernel_if->num_pages, true);
813         memset(consume_q->kernel_if->header_page, 0,
814                sizeof(*consume_q->kernel_if->header_page) *
815                consume_q->kernel_if->num_pages);
816 }
817
818 /*
819  * Once qp_host_register_user_memory has been performed on a
820  * queue, the queue pair headers can be mapped into the
821  * kernel. Once mapped, they must be unmapped with
822  * qp_host_unmap_queues prior to calling
823  * qp_host_unregister_user_memory.
824  * Pages are pinned.
825  */
826 static int qp_host_map_queues(struct vmci_queue *produce_q,
827                               struct vmci_queue *consume_q)
828 {
829         int result;
830
831         if (!produce_q->q_header || !consume_q->q_header) {
832                 struct page *headers[2];
833
834                 if (produce_q->q_header != consume_q->q_header)
835                         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
836
837                 if (produce_q->kernel_if->header_page == NULL ||
838                     *produce_q->kernel_if->header_page == NULL)
839                         return VMCI_ERROR_UNAVAILABLE;
840
841                 headers[0] = *produce_q->kernel_if->header_page;
842                 headers[1] = *consume_q->kernel_if->header_page;
843
844                 produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
845                 if (produce_q->q_header != NULL) {
846                         consume_q->q_header =
847                             (struct vmci_queue_header *)((u8 *)
848                                                          produce_q->q_header +
849                                                          PAGE_SIZE);
850                         result = VMCI_SUCCESS;
851                 } else {
852                         pr_warn("vmap failed\n");
853                         result = VMCI_ERROR_NO_MEM;
854                 }
855         } else {
856                 result = VMCI_SUCCESS;
857         }
858
859         return result;
860 }
861
862 /*
863  * Unmaps previously mapped queue pair headers from the kernel.
864  * Pages are unpinned.
865  */
866 static int qp_host_unmap_queues(u32 gid,
867                                 struct vmci_queue *produce_q,
868                                 struct vmci_queue *consume_q)
869 {
870         if (produce_q->q_header) {
871                 if (produce_q->q_header < consume_q->q_header)
872                         vunmap(produce_q->q_header);
873                 else
874                         vunmap(consume_q->q_header);
875
876                 produce_q->q_header = NULL;
877                 consume_q->q_header = NULL;
878         }
879
880         return VMCI_SUCCESS;
881 }
882
883 /*
884  * Finds the entry in the list corresponding to a given handle. Assumes
885  * that the list is locked.
886  */
887 static struct qp_entry *qp_list_find(struct qp_list *qp_list,
888                                      struct vmci_handle handle)
889 {
890         struct qp_entry *entry;
891
892         if (vmci_handle_is_invalid(handle))
893                 return NULL;
894
895         list_for_each_entry(entry, &qp_list->head, list_item) {
896                 if (vmci_handle_is_equal(entry->handle, handle))
897                         return entry;
898         }
899
900         return NULL;
901 }
902
903 /*
904  * Finds the entry in the list corresponding to a given handle.
905  */
906 static struct qp_guest_endpoint *
907 qp_guest_handle_to_entry(struct vmci_handle handle)
908 {
909         struct qp_guest_endpoint *entry;
910         struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
911
912         entry = qp ? container_of(
913                 qp, struct qp_guest_endpoint, qp) : NULL;
914         return entry;
915 }
916
917 /*
918  * Finds the entry in the list corresponding to a given handle.
919  */
920 static struct qp_broker_entry *
921 qp_broker_handle_to_entry(struct vmci_handle handle)
922 {
923         struct qp_broker_entry *entry;
924         struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
925
926         entry = qp ? container_of(
927                 qp, struct qp_broker_entry, qp) : NULL;
928         return entry;
929 }
930
931 /*
932  * Dispatches a queue pair event message directly into the local event
933  * queue.
934  */
935 static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
936 {
937         u32 context_id = vmci_get_context_id();
938         struct vmci_event_qp ev;
939
940         ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
941         ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
942                                           VMCI_CONTEXT_RESOURCE_ID);
943         ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
944         ev.msg.event_data.event =
945             attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
946         ev.payload.peer_id = context_id;
947         ev.payload.handle = handle;
948
949         return vmci_event_dispatch(&ev.msg.hdr);
950 }
951
952 /*
953  * Allocates and initializes a qp_guest_endpoint structure.
954  * Allocates a queue_pair rid (and handle) iff the given entry has
955  * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
956  * are reserved handles.  Assumes that the QP list mutex is held
957  * by the caller.
958  */
959 static struct qp_guest_endpoint *
960 qp_guest_endpoint_create(struct vmci_handle handle,
961                          u32 peer,
962                          u32 flags,
963                          u64 produce_size,
964                          u64 consume_size,
965                          void *produce_q,
966                          void *consume_q)
967 {
968         int result;
969         struct qp_guest_endpoint *entry;
970         /* One page each for the queue headers. */
971         const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
972             DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
973
974         if (vmci_handle_is_invalid(handle)) {
975                 u32 context_id = vmci_get_context_id();
976
977                 handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
978         }
979
980         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
981         if (entry) {
982                 entry->qp.peer = peer;
983                 entry->qp.flags = flags;
984                 entry->qp.produce_size = produce_size;
985                 entry->qp.consume_size = consume_size;
986                 entry->qp.ref_count = 0;
987                 entry->num_ppns = num_ppns;
988                 entry->produce_q = produce_q;
989                 entry->consume_q = consume_q;
990                 INIT_LIST_HEAD(&entry->qp.list_item);
991
992                 /* Add resource obj */
993                 result = vmci_resource_add(&entry->resource,
994                                            VMCI_RESOURCE_TYPE_QPAIR_GUEST,
995                                            handle);
996                 entry->qp.handle = vmci_resource_handle(&entry->resource);
997                 if ((result != VMCI_SUCCESS) ||
998                     qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
999                         pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1000                                 handle.context, handle.resource, result);
1001                         kfree(entry);
1002                         entry = NULL;
1003                 }
1004         }
1005         return entry;
1006 }
1007
1008 /*
1009  * Frees a qp_guest_endpoint structure.
1010  */
1011 static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
1012 {
1013         qp_free_ppn_set(&entry->ppn_set);
1014         qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
1015         qp_free_queue(entry->produce_q, entry->qp.produce_size);
1016         qp_free_queue(entry->consume_q, entry->qp.consume_size);
1017         /* Unlink from resource hash table and free callback */
1018         vmci_resource_remove(&entry->resource);
1019
1020         kfree(entry);
1021 }
1022
1023 /*
1024  * Helper to make a queue_pairAlloc hypercall when the driver is
1025  * supporting a guest device.
1026  */
1027 static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
1028 {
1029         struct vmci_qp_alloc_msg *alloc_msg;
1030         size_t msg_size;
1031         int result;
1032
1033         if (!entry || entry->num_ppns <= 2)
1034                 return VMCI_ERROR_INVALID_ARGS;
1035
1036         msg_size = sizeof(*alloc_msg) +
1037             (size_t) entry->num_ppns * sizeof(u32);
1038         alloc_msg = kmalloc(msg_size, GFP_KERNEL);
1039         if (!alloc_msg)
1040                 return VMCI_ERROR_NO_MEM;
1041
1042         alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1043                                               VMCI_QUEUEPAIR_ALLOC);
1044         alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
1045         alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
1046         alloc_msg->handle = entry->qp.handle;
1047         alloc_msg->peer = entry->qp.peer;
1048         alloc_msg->flags = entry->qp.flags;
1049         alloc_msg->produce_size = entry->qp.produce_size;
1050         alloc_msg->consume_size = entry->qp.consume_size;
1051         alloc_msg->num_ppns = entry->num_ppns;
1052
1053         result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
1054                                      &entry->ppn_set);
1055         if (result == VMCI_SUCCESS)
1056                 result = vmci_send_datagram(&alloc_msg->hdr);
1057
1058         kfree(alloc_msg);
1059
1060         return result;
1061 }
1062
1063 /*
1064  * Helper to make a queue_pairDetach hypercall when the driver is
1065  * supporting a guest device.
1066  */
1067 static int qp_detatch_hypercall(struct vmci_handle handle)
1068 {
1069         struct vmci_qp_detach_msg detach_msg;
1070
1071         detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1072                                               VMCI_QUEUEPAIR_DETACH);
1073         detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
1074         detach_msg.hdr.payload_size = sizeof(handle);
1075         detach_msg.handle = handle;
1076
1077         return vmci_send_datagram(&detach_msg.hdr);
1078 }
1079
1080 /*
1081  * Adds the given entry to the list. Assumes that the list is locked.
1082  */
1083 static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
1084 {
1085         if (entry)
1086                 list_add(&entry->list_item, &qp_list->head);
1087 }
1088
1089 /*
1090  * Removes the given entry from the list. Assumes that the list is locked.
1091  */
1092 static void qp_list_remove_entry(struct qp_list *qp_list,
1093                                  struct qp_entry *entry)
1094 {
1095         if (entry)
1096                 list_del(&entry->list_item);
1097 }
1098
1099 /*
1100  * Helper for VMCI queue_pair detach interface. Frees the physical
1101  * pages for the queue pair.
1102  */
1103 static int qp_detatch_guest_work(struct vmci_handle handle)
1104 {
1105         int result;
1106         struct qp_guest_endpoint *entry;
1107         u32 ref_count = ~0;     /* To avoid compiler warning below */
1108
1109         mutex_lock(&qp_guest_endpoints.mutex);
1110
1111         entry = qp_guest_handle_to_entry(handle);
1112         if (!entry) {
1113                 mutex_unlock(&qp_guest_endpoints.mutex);
1114                 return VMCI_ERROR_NOT_FOUND;
1115         }
1116
1117         if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1118                 result = VMCI_SUCCESS;
1119
1120                 if (entry->qp.ref_count > 1) {
1121                         result = qp_notify_peer_local(false, handle);
1122                         /*
1123                          * We can fail to notify a local queuepair
1124                          * because we can't allocate.  We still want
1125                          * to release the entry if that happens, so
1126                          * don't bail out yet.
1127                          */
1128                 }
1129         } else {
1130                 result = qp_detatch_hypercall(handle);
1131                 if (result < VMCI_SUCCESS) {
1132                         /*
1133                          * We failed to notify a non-local queuepair.
1134                          * That other queuepair might still be
1135                          * accessing the shared memory, so don't
1136                          * release the entry yet.  It will get cleaned
1137                          * up by VMCIqueue_pair_Exit() if necessary
1138                          * (assuming we are going away, otherwise why
1139                          * did this fail?).
1140                          */
1141
1142                         mutex_unlock(&qp_guest_endpoints.mutex);
1143                         return result;
1144                 }
1145         }
1146
1147         /*
1148          * If we get here then we either failed to notify a local queuepair, or
1149          * we succeeded in all cases.  Release the entry if required.
1150          */
1151
1152         entry->qp.ref_count--;
1153         if (entry->qp.ref_count == 0)
1154                 qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
1155
1156         /* If we didn't remove the entry, this could change once we unlock. */
1157         if (entry)
1158                 ref_count = entry->qp.ref_count;
1159
1160         mutex_unlock(&qp_guest_endpoints.mutex);
1161
1162         if (ref_count == 0)
1163                 qp_guest_endpoint_destroy(entry);
1164
1165         return result;
1166 }
1167
1168 /*
1169  * This functions handles the actual allocation of a VMCI queue
1170  * pair guest endpoint. Allocates physical pages for the queue
1171  * pair. It makes OS dependent calls through generic wrappers.
1172  */
1173 static int qp_alloc_guest_work(struct vmci_handle *handle,
1174                                struct vmci_queue **produce_q,
1175                                u64 produce_size,
1176                                struct vmci_queue **consume_q,
1177                                u64 consume_size,
1178                                u32 peer,
1179                                u32 flags,
1180                                u32 priv_flags)
1181 {
1182         const u64 num_produce_pages =
1183             DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
1184         const u64 num_consume_pages =
1185             DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
1186         void *my_produce_q = NULL;
1187         void *my_consume_q = NULL;
1188         int result;
1189         struct qp_guest_endpoint *queue_pair_entry = NULL;
1190
1191         if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
1192                 return VMCI_ERROR_NO_ACCESS;
1193
1194         mutex_lock(&qp_guest_endpoints.mutex);
1195
1196         queue_pair_entry = qp_guest_handle_to_entry(*handle);
1197         if (queue_pair_entry) {
1198                 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1199                         /* Local attach case. */
1200                         if (queue_pair_entry->qp.ref_count > 1) {
1201                                 pr_devel("Error attempting to attach more than once\n");
1202                                 result = VMCI_ERROR_UNAVAILABLE;
1203                                 goto error_keep_entry;
1204                         }
1205
1206                         if (queue_pair_entry->qp.produce_size != consume_size ||
1207                             queue_pair_entry->qp.consume_size !=
1208                             produce_size ||
1209                             queue_pair_entry->qp.flags !=
1210                             (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
1211                                 pr_devel("Error mismatched queue pair in local attach\n");
1212                                 result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
1213                                 goto error_keep_entry;
1214                         }
1215
1216                         /*
1217                          * Do a local attach.  We swap the consume and
1218                          * produce queues for the attacher and deliver
1219                          * an attach event.
1220                          */
1221                         result = qp_notify_peer_local(true, *handle);
1222                         if (result < VMCI_SUCCESS)
1223                                 goto error_keep_entry;
1224
1225                         my_produce_q = queue_pair_entry->consume_q;
1226                         my_consume_q = queue_pair_entry->produce_q;
1227                         goto out;
1228                 }
1229
1230                 result = VMCI_ERROR_ALREADY_EXISTS;
1231                 goto error_keep_entry;
1232         }
1233
1234         my_produce_q = qp_alloc_queue(produce_size, flags);
1235         if (!my_produce_q) {
1236                 pr_warn("Error allocating pages for produce queue\n");
1237                 result = VMCI_ERROR_NO_MEM;
1238                 goto error;
1239         }
1240
1241         my_consume_q = qp_alloc_queue(consume_size, flags);
1242         if (!my_consume_q) {
1243                 pr_warn("Error allocating pages for consume queue\n");
1244                 result = VMCI_ERROR_NO_MEM;
1245                 goto error;
1246         }
1247
1248         queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
1249                                                     produce_size, consume_size,
1250                                                     my_produce_q, my_consume_q);
1251         if (!queue_pair_entry) {
1252                 pr_warn("Error allocating memory in %s\n", __func__);
1253                 result = VMCI_ERROR_NO_MEM;
1254                 goto error;
1255         }
1256
1257         result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
1258                                   num_consume_pages,
1259                                   &queue_pair_entry->ppn_set);
1260         if (result < VMCI_SUCCESS) {
1261                 pr_warn("qp_alloc_ppn_set failed\n");
1262                 goto error;
1263         }
1264
1265         /*
1266          * It's only necessary to notify the host if this queue pair will be
1267          * attached to from another context.
1268          */
1269         if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1270                 /* Local create case. */
1271                 u32 context_id = vmci_get_context_id();
1272
1273                 /*
1274                  * Enforce similar checks on local queue pairs as we
1275                  * do for regular ones.  The handle's context must
1276                  * match the creator or attacher context id (here they
1277                  * are both the current context id) and the
1278                  * attach-only flag cannot exist during create.  We
1279                  * also ensure specified peer is this context or an
1280                  * invalid one.
1281                  */
1282                 if (queue_pair_entry->qp.handle.context != context_id ||
1283                     (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
1284                      queue_pair_entry->qp.peer != context_id)) {
1285                         result = VMCI_ERROR_NO_ACCESS;
1286                         goto error;
1287                 }
1288
1289                 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
1290                         result = VMCI_ERROR_NOT_FOUND;
1291                         goto error;
1292                 }
1293         } else {
1294                 result = qp_alloc_hypercall(queue_pair_entry);
1295                 if (result < VMCI_SUCCESS) {
1296                         pr_warn("qp_alloc_hypercall result = %d\n", result);
1297                         goto error;
1298                 }
1299         }
1300
1301         qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
1302                             (struct vmci_queue *)my_consume_q);
1303
1304         qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
1305
1306  out:
1307         queue_pair_entry->qp.ref_count++;
1308         *handle = queue_pair_entry->qp.handle;
1309         *produce_q = (struct vmci_queue *)my_produce_q;
1310         *consume_q = (struct vmci_queue *)my_consume_q;
1311
1312         /*
1313          * We should initialize the queue pair header pages on a local
1314          * queue pair create.  For non-local queue pairs, the
1315          * hypervisor initializes the header pages in the create step.
1316          */
1317         if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
1318             queue_pair_entry->qp.ref_count == 1) {
1319                 vmci_q_header_init((*produce_q)->q_header, *handle);
1320                 vmci_q_header_init((*consume_q)->q_header, *handle);
1321         }
1322
1323         mutex_unlock(&qp_guest_endpoints.mutex);
1324
1325         return VMCI_SUCCESS;
1326
1327  error:
1328         mutex_unlock(&qp_guest_endpoints.mutex);
1329         if (queue_pair_entry) {
1330                 /* The queues will be freed inside the destroy routine. */
1331                 qp_guest_endpoint_destroy(queue_pair_entry);
1332         } else {
1333                 qp_free_queue(my_produce_q, produce_size);
1334                 qp_free_queue(my_consume_q, consume_size);
1335         }
1336         return result;
1337
1338  error_keep_entry:
1339         /* This path should only be used when an existing entry was found. */
1340         mutex_unlock(&qp_guest_endpoints.mutex);
1341         return result;
1342 }
1343
1344 /*
1345  * The first endpoint issuing a queue pair allocation will create the state
1346  * of the queue pair in the queue pair broker.
1347  *
1348  * If the creator is a guest, it will associate a VMX virtual address range
1349  * with the queue pair as specified by the page_store. For compatibility with
1350  * older VMX'en, that would use a separate step to set the VMX virtual
1351  * address range, the virtual address range can be registered later using
1352  * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
1353  * used.
1354  *
1355  * If the creator is the host, a page_store of NULL should be used as well,
1356  * since the host is not able to supply a page store for the queue pair.
1357  *
1358  * For older VMX and host callers, the queue pair will be created in the
1359  * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
1360  * created in VMCOQPB_CREATED_MEM state.
1361  */
1362 static int qp_broker_create(struct vmci_handle handle,
1363                             u32 peer,
1364                             u32 flags,
1365                             u32 priv_flags,
1366                             u64 produce_size,
1367                             u64 consume_size,
1368                             struct vmci_qp_page_store *page_store,
1369                             struct vmci_ctx *context,
1370                             vmci_event_release_cb wakeup_cb,
1371                             void *client_data, struct qp_broker_entry **ent)
1372 {
1373         struct qp_broker_entry *entry = NULL;
1374         const u32 context_id = vmci_ctx_get_id(context);
1375         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1376         int result;
1377         u64 guest_produce_size;
1378         u64 guest_consume_size;
1379
1380         /* Do not create if the caller asked not to. */
1381         if (flags & VMCI_QPFLAG_ATTACH_ONLY)
1382                 return VMCI_ERROR_NOT_FOUND;
1383
1384         /*
1385          * Creator's context ID should match handle's context ID or the creator
1386          * must allow the context in handle's context ID as the "peer".
1387          */
1388         if (handle.context != context_id && handle.context != peer)
1389                 return VMCI_ERROR_NO_ACCESS;
1390
1391         if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
1392                 return VMCI_ERROR_DST_UNREACHABLE;
1393
1394         /*
1395          * Creator's context ID for local queue pairs should match the
1396          * peer, if a peer is specified.
1397          */
1398         if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
1399                 return VMCI_ERROR_NO_ACCESS;
1400
1401         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
1402         if (!entry)
1403                 return VMCI_ERROR_NO_MEM;
1404
1405         if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
1406                 /*
1407                  * The queue pair broker entry stores values from the guest
1408                  * point of view, so a creating host side endpoint should swap
1409                  * produce and consume values -- unless it is a local queue
1410                  * pair, in which case no swapping is necessary, since the local
1411                  * attacher will swap queues.
1412                  */
1413
1414                 guest_produce_size = consume_size;
1415                 guest_consume_size = produce_size;
1416         } else {
1417                 guest_produce_size = produce_size;
1418                 guest_consume_size = consume_size;
1419         }
1420
1421         entry->qp.handle = handle;
1422         entry->qp.peer = peer;
1423         entry->qp.flags = flags;
1424         entry->qp.produce_size = guest_produce_size;
1425         entry->qp.consume_size = guest_consume_size;
1426         entry->qp.ref_count = 1;
1427         entry->create_id = context_id;
1428         entry->attach_id = VMCI_INVALID_ID;
1429         entry->state = VMCIQPB_NEW;
1430         entry->require_trusted_attach =
1431             !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
1432         entry->created_by_trusted =
1433             !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
1434         entry->vmci_page_files = false;
1435         entry->wakeup_cb = wakeup_cb;
1436         entry->client_data = client_data;
1437         entry->produce_q = qp_host_alloc_queue(guest_produce_size);
1438         if (entry->produce_q == NULL) {
1439                 result = VMCI_ERROR_NO_MEM;
1440                 goto error;
1441         }
1442         entry->consume_q = qp_host_alloc_queue(guest_consume_size);
1443         if (entry->consume_q == NULL) {
1444                 result = VMCI_ERROR_NO_MEM;
1445                 goto error;
1446         }
1447
1448         qp_init_queue_mutex(entry->produce_q, entry->consume_q);
1449
1450         INIT_LIST_HEAD(&entry->qp.list_item);
1451
1452         if (is_local) {
1453                 u8 *tmp;
1454
1455                 entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
1456                                            PAGE_SIZE, GFP_KERNEL);
1457                 if (entry->local_mem == NULL) {
1458                         result = VMCI_ERROR_NO_MEM;
1459                         goto error;
1460                 }
1461                 entry->state = VMCIQPB_CREATED_MEM;
1462                 entry->produce_q->q_header = entry->local_mem;
1463                 tmp = (u8 *)entry->local_mem + PAGE_SIZE *
1464                     (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
1465                 entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
1466         } else if (page_store) {
1467                 /*
1468                  * The VMX already initialized the queue pair headers, so no
1469                  * need for the kernel side to do that.
1470                  */
1471                 result = qp_host_register_user_memory(page_store,
1472                                                       entry->produce_q,
1473                                                       entry->consume_q);
1474                 if (result < VMCI_SUCCESS)
1475                         goto error;
1476
1477                 entry->state = VMCIQPB_CREATED_MEM;
1478         } else {
1479                 /*
1480                  * A create without a page_store may be either a host
1481                  * side create (in which case we are waiting for the
1482                  * guest side to supply the memory) or an old style
1483                  * queue pair create (in which case we will expect a
1484                  * set page store call as the next step).
1485                  */
1486                 entry->state = VMCIQPB_CREATED_NO_MEM;
1487         }
1488
1489         qp_list_add_entry(&qp_broker_list, &entry->qp);
1490         if (ent != NULL)
1491                 *ent = entry;
1492
1493         /* Add to resource obj */
1494         result = vmci_resource_add(&entry->resource,
1495                                    VMCI_RESOURCE_TYPE_QPAIR_HOST,
1496                                    handle);
1497         if (result != VMCI_SUCCESS) {
1498                 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1499                         handle.context, handle.resource, result);
1500                 goto error;
1501         }
1502
1503         entry->qp.handle = vmci_resource_handle(&entry->resource);
1504         if (is_local) {
1505                 vmci_q_header_init(entry->produce_q->q_header,
1506                                    entry->qp.handle);
1507                 vmci_q_header_init(entry->consume_q->q_header,
1508                                    entry->qp.handle);
1509         }
1510
1511         vmci_ctx_qp_create(context, entry->qp.handle);
1512
1513         return VMCI_SUCCESS;
1514
1515  error:
1516         if (entry != NULL) {
1517                 qp_host_free_queue(entry->produce_q, guest_produce_size);
1518                 qp_host_free_queue(entry->consume_q, guest_consume_size);
1519                 kfree(entry);
1520         }
1521
1522         return result;
1523 }
1524
1525 /*
1526  * Enqueues an event datagram to notify the peer VM attached to
1527  * the given queue pair handle about attach/detach event by the
1528  * given VM.  Returns Payload size of datagram enqueued on
1529  * success, error code otherwise.
1530  */
1531 static int qp_notify_peer(bool attach,
1532                           struct vmci_handle handle,
1533                           u32 my_id,
1534                           u32 peer_id)
1535 {
1536         int rv;
1537         struct vmci_event_qp ev;
1538
1539         if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
1540             peer_id == VMCI_INVALID_ID)
1541                 return VMCI_ERROR_INVALID_ARGS;
1542
1543         /*
1544          * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
1545          * number of pending events from the hypervisor to a given VM
1546          * otherwise a rogue VM could do an arbitrary number of attach
1547          * and detach operations causing memory pressure in the host
1548          * kernel.
1549          */
1550
1551         ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
1552         ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1553                                           VMCI_CONTEXT_RESOURCE_ID);
1554         ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
1555         ev.msg.event_data.event = attach ?
1556             VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
1557         ev.payload.handle = handle;
1558         ev.payload.peer_id = my_id;
1559
1560         rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
1561                                     &ev.msg.hdr, false);
1562         if (rv < VMCI_SUCCESS)
1563                 pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
1564                         attach ? "ATTACH" : "DETACH", peer_id);
1565
1566         return rv;
1567 }
1568
1569 /*
1570  * The second endpoint issuing a queue pair allocation will attach to
1571  * the queue pair registered with the queue pair broker.
1572  *
1573  * If the attacher is a guest, it will associate a VMX virtual address
1574  * range with the queue pair as specified by the page_store. At this
1575  * point, the already attach host endpoint may start using the queue
1576  * pair, and an attach event is sent to it. For compatibility with
1577  * older VMX'en, that used a separate step to set the VMX virtual
1578  * address range, the virtual address range can be registered later
1579  * using vmci_qp_broker_set_page_store. In that case, a page_store of
1580  * NULL should be used, and the attach event will be generated once
1581  * the actual page store has been set.
1582  *
1583  * If the attacher is the host, a page_store of NULL should be used as
1584  * well, since the page store information is already set by the guest.
1585  *
1586  * For new VMX and host callers, the queue pair will be moved to the
1587  * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
1588  * moved to the VMCOQPB_ATTACHED_NO_MEM state.
1589  */
1590 static int qp_broker_attach(struct qp_broker_entry *entry,
1591                             u32 peer,
1592                             u32 flags,
1593                             u32 priv_flags,
1594                             u64 produce_size,
1595                             u64 consume_size,
1596                             struct vmci_qp_page_store *page_store,
1597                             struct vmci_ctx *context,
1598                             vmci_event_release_cb wakeup_cb,
1599                             void *client_data,
1600                             struct qp_broker_entry **ent)
1601 {
1602         const u32 context_id = vmci_ctx_get_id(context);
1603         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1604         int result;
1605
1606         if (entry->state != VMCIQPB_CREATED_NO_MEM &&
1607             entry->state != VMCIQPB_CREATED_MEM)
1608                 return VMCI_ERROR_UNAVAILABLE;
1609
1610         if (is_local) {
1611                 if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
1612                     context_id != entry->create_id) {
1613                         return VMCI_ERROR_INVALID_ARGS;
1614                 }
1615         } else if (context_id == entry->create_id ||
1616                    context_id == entry->attach_id) {
1617                 return VMCI_ERROR_ALREADY_EXISTS;
1618         }
1619
1620         if (VMCI_CONTEXT_IS_VM(context_id) &&
1621             VMCI_CONTEXT_IS_VM(entry->create_id))
1622                 return VMCI_ERROR_DST_UNREACHABLE;
1623
1624         /*
1625          * If we are attaching from a restricted context then the queuepair
1626          * must have been created by a trusted endpoint.
1627          */
1628         if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
1629             !entry->created_by_trusted)
1630                 return VMCI_ERROR_NO_ACCESS;
1631
1632         /*
1633          * If we are attaching to a queuepair that was created by a restricted
1634          * context then we must be trusted.
1635          */
1636         if (entry->require_trusted_attach &&
1637             (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
1638                 return VMCI_ERROR_NO_ACCESS;
1639
1640         /*
1641          * If the creator specifies VMCI_INVALID_ID in "peer" field, access
1642          * control check is not performed.
1643          */
1644         if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
1645                 return VMCI_ERROR_NO_ACCESS;
1646
1647         if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
1648                 /*
1649                  * Do not attach if the caller doesn't support Host Queue Pairs
1650                  * and a host created this queue pair.
1651                  */
1652
1653                 if (!vmci_ctx_supports_host_qp(context))
1654                         return VMCI_ERROR_INVALID_RESOURCE;
1655
1656         } else if (context_id == VMCI_HOST_CONTEXT_ID) {
1657                 struct vmci_ctx *create_context;
1658                 bool supports_host_qp;
1659
1660                 /*
1661                  * Do not attach a host to a user created queue pair if that
1662                  * user doesn't support host queue pair end points.
1663                  */
1664
1665                 create_context = vmci_ctx_get(entry->create_id);
1666                 supports_host_qp = vmci_ctx_supports_host_qp(create_context);
1667                 vmci_ctx_put(create_context);
1668
1669                 if (!supports_host_qp)
1670                         return VMCI_ERROR_INVALID_RESOURCE;
1671         }
1672
1673         if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
1674                 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1675
1676         if (context_id != VMCI_HOST_CONTEXT_ID) {
1677                 /*
1678                  * The queue pair broker entry stores values from the guest
1679                  * point of view, so an attaching guest should match the values
1680                  * stored in the entry.
1681                  */
1682
1683                 if (entry->qp.produce_size != produce_size ||
1684                     entry->qp.consume_size != consume_size) {
1685                         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1686                 }
1687         } else if (entry->qp.produce_size != consume_size ||
1688                    entry->qp.consume_size != produce_size) {
1689                 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1690         }
1691
1692         if (context_id != VMCI_HOST_CONTEXT_ID) {
1693                 /*
1694                  * If a guest attached to a queue pair, it will supply
1695                  * the backing memory.  If this is a pre NOVMVM vmx,
1696                  * the backing memory will be supplied by calling
1697                  * vmci_qp_broker_set_page_store() following the
1698                  * return of the vmci_qp_broker_alloc() call. If it is
1699                  * a vmx of version NOVMVM or later, the page store
1700                  * must be supplied as part of the
1701                  * vmci_qp_broker_alloc call.  Under all circumstances
1702                  * must the initially created queue pair not have any
1703                  * memory associated with it already.
1704                  */
1705
1706                 if (entry->state != VMCIQPB_CREATED_NO_MEM)
1707                         return VMCI_ERROR_INVALID_ARGS;
1708
1709                 if (page_store != NULL) {
1710                         /*
1711                          * Patch up host state to point to guest
1712                          * supplied memory. The VMX already
1713                          * initialized the queue pair headers, so no
1714                          * need for the kernel side to do that.
1715                          */
1716
1717                         result = qp_host_register_user_memory(page_store,
1718                                                               entry->produce_q,
1719                                                               entry->consume_q);
1720                         if (result < VMCI_SUCCESS)
1721                                 return result;
1722
1723                         /*
1724                          * Preemptively load in the headers if non-blocking to
1725                          * prevent blocking later.
1726                          */
1727                         if (entry->qp.flags & VMCI_QPFLAG_NONBLOCK) {
1728                                 result = qp_host_map_queues(entry->produce_q,
1729                                                             entry->consume_q);
1730                                 if (result < VMCI_SUCCESS) {
1731                                         qp_host_unregister_user_memory(
1732                                                 entry->produce_q,
1733                                                 entry->consume_q);
1734                                         return result;
1735                                 }
1736                         }
1737
1738                         entry->state = VMCIQPB_ATTACHED_MEM;
1739                 } else {
1740                         entry->state = VMCIQPB_ATTACHED_NO_MEM;
1741                 }
1742         } else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
1743                 /*
1744                  * The host side is attempting to attach to a queue
1745                  * pair that doesn't have any memory associated with
1746                  * it. This must be a pre NOVMVM vmx that hasn't set
1747                  * the page store information yet, or a quiesced VM.
1748                  */
1749
1750                 return VMCI_ERROR_UNAVAILABLE;
1751         } else {
1752                 /*
1753                  * For non-blocking queue pairs, we cannot rely on
1754                  * enqueue/dequeue to map in the pages on the
1755                  * host-side, since it may block, so we make an
1756                  * attempt here.
1757                  */
1758
1759                 if (flags & VMCI_QPFLAG_NONBLOCK) {
1760                         result =
1761                             qp_host_map_queues(entry->produce_q,
1762                                                entry->consume_q);
1763                         if (result < VMCI_SUCCESS)
1764                                 return result;
1765
1766                         entry->qp.flags |= flags &
1767                             (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED);
1768                 }
1769
1770                 /* The host side has successfully attached to a queue pair. */
1771                 entry->state = VMCIQPB_ATTACHED_MEM;
1772         }
1773
1774         if (entry->state == VMCIQPB_ATTACHED_MEM) {
1775                 result =
1776                     qp_notify_peer(true, entry->qp.handle, context_id,
1777                                    entry->create_id);
1778                 if (result < VMCI_SUCCESS)
1779                         pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
1780                                 entry->create_id, entry->qp.handle.context,
1781                                 entry->qp.handle.resource);
1782         }
1783
1784         entry->attach_id = context_id;
1785         entry->qp.ref_count++;
1786         if (wakeup_cb) {
1787                 entry->wakeup_cb = wakeup_cb;
1788                 entry->client_data = client_data;
1789         }
1790
1791         /*
1792          * When attaching to local queue pairs, the context already has
1793          * an entry tracking the queue pair, so don't add another one.
1794          */
1795         if (!is_local)
1796                 vmci_ctx_qp_create(context, entry->qp.handle);
1797
1798         if (ent != NULL)
1799                 *ent = entry;
1800
1801         return VMCI_SUCCESS;
1802 }
1803
1804 /*
1805  * queue_pair_Alloc for use when setting up queue pair endpoints
1806  * on the host.
1807  */
1808 static int qp_broker_alloc(struct vmci_handle handle,
1809                            u32 peer,
1810                            u32 flags,
1811                            u32 priv_flags,
1812                            u64 produce_size,
1813                            u64 consume_size,
1814                            struct vmci_qp_page_store *page_store,
1815                            struct vmci_ctx *context,
1816                            vmci_event_release_cb wakeup_cb,
1817                            void *client_data,
1818                            struct qp_broker_entry **ent,
1819                            bool *swap)
1820 {
1821         const u32 context_id = vmci_ctx_get_id(context);
1822         bool create;
1823         struct qp_broker_entry *entry = NULL;
1824         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1825         int result;
1826
1827         if (vmci_handle_is_invalid(handle) ||
1828             (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
1829             !(produce_size || consume_size) ||
1830             !context || context_id == VMCI_INVALID_ID ||
1831             handle.context == VMCI_INVALID_ID) {
1832                 return VMCI_ERROR_INVALID_ARGS;
1833         }
1834
1835         if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
1836                 return VMCI_ERROR_INVALID_ARGS;
1837
1838         /*
1839          * In the initial argument check, we ensure that non-vmkernel hosts
1840          * are not allowed to create local queue pairs.
1841          */
1842
1843         mutex_lock(&qp_broker_list.mutex);
1844
1845         if (!is_local && vmci_ctx_qp_exists(context, handle)) {
1846                 pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
1847                          context_id, handle.context, handle.resource);
1848                 mutex_unlock(&qp_broker_list.mutex);
1849                 return VMCI_ERROR_ALREADY_EXISTS;
1850         }
1851
1852         if (handle.resource != VMCI_INVALID_ID)
1853                 entry = qp_broker_handle_to_entry(handle);
1854
1855         if (!entry) {
1856                 create = true;
1857                 result =
1858                     qp_broker_create(handle, peer, flags, priv_flags,
1859                                      produce_size, consume_size, page_store,
1860                                      context, wakeup_cb, client_data, ent);
1861         } else {
1862                 create = false;
1863                 result =
1864                     qp_broker_attach(entry, peer, flags, priv_flags,
1865                                      produce_size, consume_size, page_store,
1866                                      context, wakeup_cb, client_data, ent);
1867         }
1868
1869         mutex_unlock(&qp_broker_list.mutex);
1870
1871         if (swap)
1872                 *swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
1873                     !(create && is_local);
1874
1875         return result;
1876 }
1877
1878 /*
1879  * This function implements the kernel API for allocating a queue
1880  * pair.
1881  */
1882 static int qp_alloc_host_work(struct vmci_handle *handle,
1883                               struct vmci_queue **produce_q,
1884                               u64 produce_size,
1885                               struct vmci_queue **consume_q,
1886                               u64 consume_size,
1887                               u32 peer,
1888                               u32 flags,
1889                               u32 priv_flags,
1890                               vmci_event_release_cb wakeup_cb,
1891                               void *client_data)
1892 {
1893         struct vmci_handle new_handle;
1894         struct vmci_ctx *context;
1895         struct qp_broker_entry *entry;
1896         int result;
1897         bool swap;
1898
1899         if (vmci_handle_is_invalid(*handle)) {
1900                 new_handle = vmci_make_handle(
1901                         VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
1902         } else
1903                 new_handle = *handle;
1904
1905         context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1906         entry = NULL;
1907         result =
1908             qp_broker_alloc(new_handle, peer, flags, priv_flags,
1909                             produce_size, consume_size, NULL, context,
1910                             wakeup_cb, client_data, &entry, &swap);
1911         if (result == VMCI_SUCCESS) {
1912                 if (swap) {
1913                         /*
1914                          * If this is a local queue pair, the attacher
1915                          * will swap around produce and consume
1916                          * queues.
1917                          */
1918
1919                         *produce_q = entry->consume_q;
1920                         *consume_q = entry->produce_q;
1921                 } else {
1922                         *produce_q = entry->produce_q;
1923                         *consume_q = entry->consume_q;
1924                 }
1925
1926                 *handle = vmci_resource_handle(&entry->resource);
1927         } else {
1928                 *handle = VMCI_INVALID_HANDLE;
1929                 pr_devel("queue pair broker failed to alloc (result=%d)\n",
1930                          result);
1931         }
1932         vmci_ctx_put(context);
1933         return result;
1934 }
1935
1936 /*
1937  * Allocates a VMCI queue_pair. Only checks validity of input
1938  * arguments. The real work is done in the host or guest
1939  * specific function.
1940  */
1941 int vmci_qp_alloc(struct vmci_handle *handle,
1942                   struct vmci_queue **produce_q,
1943                   u64 produce_size,
1944                   struct vmci_queue **consume_q,
1945                   u64 consume_size,
1946                   u32 peer,
1947                   u32 flags,
1948                   u32 priv_flags,
1949                   bool guest_endpoint,
1950                   vmci_event_release_cb wakeup_cb,
1951                   void *client_data)
1952 {
1953         if (!handle || !produce_q || !consume_q ||
1954             (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
1955                 return VMCI_ERROR_INVALID_ARGS;
1956
1957         if (guest_endpoint) {
1958                 return qp_alloc_guest_work(handle, produce_q,
1959                                            produce_size, consume_q,
1960                                            consume_size, peer,
1961                                            flags, priv_flags);
1962         } else {
1963                 return qp_alloc_host_work(handle, produce_q,
1964                                           produce_size, consume_q,
1965                                           consume_size, peer, flags,
1966                                           priv_flags, wakeup_cb, client_data);
1967         }
1968 }
1969
1970 /*
1971  * This function implements the host kernel API for detaching from
1972  * a queue pair.
1973  */
1974 static int qp_detatch_host_work(struct vmci_handle handle)
1975 {
1976         int result;
1977         struct vmci_ctx *context;
1978
1979         context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1980
1981         result = vmci_qp_broker_detach(handle, context);
1982
1983         vmci_ctx_put(context);
1984         return result;
1985 }
1986
1987 /*
1988  * Detaches from a VMCI queue_pair. Only checks validity of input argument.
1989  * Real work is done in the host or guest specific function.
1990  */
1991 static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
1992 {
1993         if (vmci_handle_is_invalid(handle))
1994                 return VMCI_ERROR_INVALID_ARGS;
1995
1996         if (guest_endpoint)
1997                 return qp_detatch_guest_work(handle);
1998         else
1999                 return qp_detatch_host_work(handle);
2000 }
2001
2002 /*
2003  * Returns the entry from the head of the list. Assumes that the list is
2004  * locked.
2005  */
2006 static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
2007 {
2008         if (!list_empty(&qp_list->head)) {
2009                 struct qp_entry *entry =
2010                     list_first_entry(&qp_list->head, struct qp_entry,
2011                                      list_item);
2012                 return entry;
2013         }
2014
2015         return NULL;
2016 }
2017
2018 void vmci_qp_broker_exit(void)
2019 {
2020         struct qp_entry *entry;
2021         struct qp_broker_entry *be;
2022
2023         mutex_lock(&qp_broker_list.mutex);
2024
2025         while ((entry = qp_list_get_head(&qp_broker_list))) {
2026                 be = (struct qp_broker_entry *)entry;
2027
2028                 qp_list_remove_entry(&qp_broker_list, entry);
2029                 kfree(be);
2030         }
2031
2032         mutex_unlock(&qp_broker_list.mutex);
2033 }
2034
2035 /*
2036  * Requests that a queue pair be allocated with the VMCI queue
2037  * pair broker. Allocates a queue pair entry if one does not
2038  * exist. Attaches to one if it exists, and retrieves the page
2039  * files backing that queue_pair.  Assumes that the queue pair
2040  * broker lock is held.
2041  */
2042 int vmci_qp_broker_alloc(struct vmci_handle handle,
2043                          u32 peer,
2044                          u32 flags,
2045                          u32 priv_flags,
2046                          u64 produce_size,
2047                          u64 consume_size,
2048                          struct vmci_qp_page_store *page_store,
2049                          struct vmci_ctx *context)
2050 {
2051         return qp_broker_alloc(handle, peer, flags, priv_flags,
2052                                produce_size, consume_size,
2053                                page_store, context, NULL, NULL, NULL, NULL);
2054 }
2055
2056 /*
2057  * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
2058  * step to add the UVAs of the VMX mapping of the queue pair. This function
2059  * provides backwards compatibility with such VMX'en, and takes care of
2060  * registering the page store for a queue pair previously allocated by the
2061  * VMX during create or attach. This function will move the queue pair state
2062  * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
2063  * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
2064  * attached state with memory, the queue pair is ready to be used by the
2065  * host peer, and an attached event will be generated.
2066  *
2067  * Assumes that the queue pair broker lock is held.
2068  *
2069  * This function is only used by the hosted platform, since there is no
2070  * issue with backwards compatibility for vmkernel.
2071  */
2072 int vmci_qp_broker_set_page_store(struct vmci_handle handle,
2073                                   u64 produce_uva,
2074                                   u64 consume_uva,
2075                                   struct vmci_ctx *context)
2076 {
2077         struct qp_broker_entry *entry;
2078         int result;
2079         const u32 context_id = vmci_ctx_get_id(context);
2080
2081         if (vmci_handle_is_invalid(handle) || !context ||
2082             context_id == VMCI_INVALID_ID)
2083                 return VMCI_ERROR_INVALID_ARGS;
2084
2085         /*
2086          * We only support guest to host queue pairs, so the VMX must
2087          * supply UVAs for the mapped page files.
2088          */
2089
2090         if (produce_uva == 0 || consume_uva == 0)
2091                 return VMCI_ERROR_INVALID_ARGS;
2092
2093         mutex_lock(&qp_broker_list.mutex);
2094
2095         if (!vmci_ctx_qp_exists(context, handle)) {
2096                 pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2097                         context_id, handle.context, handle.resource);
2098                 result = VMCI_ERROR_NOT_FOUND;
2099                 goto out;
2100         }
2101
2102         entry = qp_broker_handle_to_entry(handle);
2103         if (!entry) {
2104                 result = VMCI_ERROR_NOT_FOUND;
2105                 goto out;
2106         }
2107
2108         /*
2109          * If I'm the owner then I can set the page store.
2110          *
2111          * Or, if a host created the queue_pair and I'm the attached peer
2112          * then I can set the page store.
2113          */
2114         if (entry->create_id != context_id &&
2115             (entry->create_id != VMCI_HOST_CONTEXT_ID ||
2116              entry->attach_id != context_id)) {
2117                 result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
2118                 goto out;
2119         }
2120
2121         if (entry->state != VMCIQPB_CREATED_NO_MEM &&
2122             entry->state != VMCIQPB_ATTACHED_NO_MEM) {
2123                 result = VMCI_ERROR_UNAVAILABLE;
2124                 goto out;
2125         }
2126
2127         result = qp_host_get_user_memory(produce_uva, consume_uva,
2128                                          entry->produce_q, entry->consume_q);
2129         if (result < VMCI_SUCCESS)
2130                 goto out;
2131
2132         result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2133         if (result < VMCI_SUCCESS) {
2134                 qp_host_unregister_user_memory(entry->produce_q,
2135                                                entry->consume_q);
2136                 goto out;
2137         }
2138
2139         if (entry->state == VMCIQPB_CREATED_NO_MEM)
2140                 entry->state = VMCIQPB_CREATED_MEM;
2141         else
2142                 entry->state = VMCIQPB_ATTACHED_MEM;
2143
2144         entry->vmci_page_files = true;
2145
2146         if (entry->state == VMCIQPB_ATTACHED_MEM) {
2147                 result =
2148                     qp_notify_peer(true, handle, context_id, entry->create_id);
2149                 if (result < VMCI_SUCCESS) {
2150                         pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
2151                                 entry->create_id, entry->qp.handle.context,
2152                                 entry->qp.handle.resource);
2153                 }
2154         }
2155
2156         result = VMCI_SUCCESS;
2157  out:
2158         mutex_unlock(&qp_broker_list.mutex);
2159         return result;
2160 }
2161
2162 /*
2163  * Resets saved queue headers for the given QP broker
2164  * entry. Should be used when guest memory becomes available
2165  * again, or the guest detaches.
2166  */
2167 static void qp_reset_saved_headers(struct qp_broker_entry *entry)
2168 {
2169         entry->produce_q->saved_header = NULL;
2170         entry->consume_q->saved_header = NULL;
2171 }
2172
2173 /*
2174  * The main entry point for detaching from a queue pair registered with the
2175  * queue pair broker. If more than one endpoint is attached to the queue
2176  * pair, the first endpoint will mainly decrement a reference count and
2177  * generate a notification to its peer. The last endpoint will clean up
2178  * the queue pair state registered with the broker.
2179  *
2180  * When a guest endpoint detaches, it will unmap and unregister the guest
2181  * memory backing the queue pair. If the host is still attached, it will
2182  * no longer be able to access the queue pair content.
2183  *
2184  * If the queue pair is already in a state where there is no memory
2185  * registered for the queue pair (any *_NO_MEM state), it will transition to
2186  * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
2187  * endpoint is the first of two endpoints to detach. If the host endpoint is
2188  * the first out of two to detach, the queue pair will move to the
2189  * VMCIQPB_SHUTDOWN_MEM state.
2190  */
2191 int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
2192 {
2193         struct qp_broker_entry *entry;
2194         const u32 context_id = vmci_ctx_get_id(context);
2195         u32 peer_id;
2196         bool is_local = false;
2197         int result;
2198
2199         if (vmci_handle_is_invalid(handle) || !context ||
2200             context_id == VMCI_INVALID_ID) {
2201                 return VMCI_ERROR_INVALID_ARGS;
2202         }
2203
2204         mutex_lock(&qp_broker_list.mutex);
2205
2206         if (!vmci_ctx_qp_exists(context, handle)) {
2207                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2208                          context_id, handle.context, handle.resource);
2209                 result = VMCI_ERROR_NOT_FOUND;
2210                 goto out;
2211         }
2212
2213         entry = qp_broker_handle_to_entry(handle);
2214         if (!entry) {
2215                 pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
2216                          context_id, handle.context, handle.resource);
2217                 result = VMCI_ERROR_NOT_FOUND;
2218                 goto out;
2219         }
2220
2221         if (context_id != entry->create_id && context_id != entry->attach_id) {
2222                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2223                 goto out;
2224         }
2225
2226         if (context_id == entry->create_id) {
2227                 peer_id = entry->attach_id;
2228                 entry->create_id = VMCI_INVALID_ID;
2229         } else {
2230                 peer_id = entry->create_id;
2231                 entry->attach_id = VMCI_INVALID_ID;
2232         }
2233         entry->qp.ref_count--;
2234
2235         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2236
2237         if (context_id != VMCI_HOST_CONTEXT_ID) {
2238                 bool headers_mapped;
2239
2240                 /*
2241                  * Pre NOVMVM vmx'en may detach from a queue pair
2242                  * before setting the page store, and in that case
2243                  * there is no user memory to detach from. Also, more
2244                  * recent VMX'en may detach from a queue pair in the
2245                  * quiesced state.
2246                  */
2247
2248                 qp_acquire_queue_mutex(entry->produce_q);
2249                 headers_mapped = entry->produce_q->q_header ||
2250                     entry->consume_q->q_header;
2251                 if (QPBROKERSTATE_HAS_MEM(entry)) {
2252                         result =
2253                             qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
2254                                                  entry->produce_q,
2255                                                  entry->consume_q);
2256                         if (result < VMCI_SUCCESS)
2257                                 pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2258                                         handle.context, handle.resource,
2259                                         result);
2260
2261                         if (entry->vmci_page_files)
2262                                 qp_host_unregister_user_memory(entry->produce_q,
2263                                                                entry->
2264                                                                consume_q);
2265                         else
2266                                 qp_host_unregister_user_memory(entry->produce_q,
2267                                                                entry->
2268                                                                consume_q);
2269
2270                 }
2271
2272                 if (!headers_mapped)
2273                         qp_reset_saved_headers(entry);
2274
2275                 qp_release_queue_mutex(entry->produce_q);
2276
2277                 if (!headers_mapped && entry->wakeup_cb)
2278                         entry->wakeup_cb(entry->client_data);
2279
2280         } else {
2281                 if (entry->wakeup_cb) {
2282                         entry->wakeup_cb = NULL;
2283                         entry->client_data = NULL;
2284                 }
2285         }
2286
2287         if (entry->qp.ref_count == 0) {
2288                 qp_list_remove_entry(&qp_broker_list, &entry->qp);
2289
2290                 if (is_local)
2291                         kfree(entry->local_mem);
2292
2293                 qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
2294                 qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
2295                 qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
2296                 /* Unlink from resource hash table and free callback */
2297                 vmci_resource_remove(&entry->resource);
2298
2299                 kfree(entry);
2300
2301                 vmci_ctx_qp_destroy(context, handle);
2302         } else {
2303                 qp_notify_peer(false, handle, context_id, peer_id);
2304                 if (context_id == VMCI_HOST_CONTEXT_ID &&
2305                     QPBROKERSTATE_HAS_MEM(entry)) {
2306                         entry->state = VMCIQPB_SHUTDOWN_MEM;
2307                 } else {
2308                         entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
2309                 }
2310
2311                 if (!is_local)
2312                         vmci_ctx_qp_destroy(context, handle);
2313
2314         }
2315         result = VMCI_SUCCESS;
2316  out:
2317         mutex_unlock(&qp_broker_list.mutex);
2318         return result;
2319 }
2320
2321 /*
2322  * Establishes the necessary mappings for a queue pair given a
2323  * reference to the queue pair guest memory. This is usually
2324  * called when a guest is unquiesced and the VMX is allowed to
2325  * map guest memory once again.
2326  */
2327 int vmci_qp_broker_map(struct vmci_handle handle,
2328                        struct vmci_ctx *context,
2329                        u64 guest_mem)
2330 {
2331         struct qp_broker_entry *entry;
2332         const u32 context_id = vmci_ctx_get_id(context);
2333         bool is_local = false;
2334         int result;
2335
2336         if (vmci_handle_is_invalid(handle) || !context ||
2337             context_id == VMCI_INVALID_ID)
2338                 return VMCI_ERROR_INVALID_ARGS;
2339
2340         mutex_lock(&qp_broker_list.mutex);
2341
2342         if (!vmci_ctx_qp_exists(context, handle)) {
2343                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2344                          context_id, handle.context, handle.resource);
2345                 result = VMCI_ERROR_NOT_FOUND;
2346                 goto out;
2347         }
2348
2349         entry = qp_broker_handle_to_entry(handle);
2350         if (!entry) {
2351                 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2352                          context_id, handle.context, handle.resource);
2353                 result = VMCI_ERROR_NOT_FOUND;
2354                 goto out;
2355         }
2356
2357         if (context_id != entry->create_id && context_id != entry->attach_id) {
2358                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2359                 goto out;
2360         }
2361
2362         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2363         result = VMCI_SUCCESS;
2364
2365         if (context_id != VMCI_HOST_CONTEXT_ID) {
2366                 struct vmci_qp_page_store page_store;
2367
2368                 page_store.pages = guest_mem;
2369                 page_store.len = QPE_NUM_PAGES(entry->qp);
2370
2371                 qp_acquire_queue_mutex(entry->produce_q);
2372                 qp_reset_saved_headers(entry);
2373                 result =
2374                     qp_host_register_user_memory(&page_store,
2375                                                  entry->produce_q,
2376                                                  entry->consume_q);
2377                 qp_release_queue_mutex(entry->produce_q);
2378                 if (result == VMCI_SUCCESS) {
2379                         /* Move state from *_NO_MEM to *_MEM */
2380
2381                         entry->state++;
2382
2383                         if (entry->wakeup_cb)
2384                                 entry->wakeup_cb(entry->client_data);
2385                 }
2386         }
2387
2388  out:
2389         mutex_unlock(&qp_broker_list.mutex);
2390         return result;
2391 }
2392
2393 /*
2394  * Saves a snapshot of the queue headers for the given QP broker
2395  * entry. Should be used when guest memory is unmapped.
2396  * Results:
2397  * VMCI_SUCCESS on success, appropriate error code if guest memory
2398  * can't be accessed..
2399  */
2400 static int qp_save_headers(struct qp_broker_entry *entry)
2401 {
2402         int result;
2403
2404         if (entry->produce_q->saved_header != NULL &&
2405             entry->consume_q->saved_header != NULL) {
2406                 /*
2407                  *  If the headers have already been saved, we don't need to do
2408                  *  it again, and we don't want to map in the headers
2409                  *  unnecessarily.
2410                  */
2411
2412                 return VMCI_SUCCESS;
2413         }
2414
2415         if (NULL == entry->produce_q->q_header ||
2416             NULL == entry->consume_q->q_header) {
2417                 result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2418                 if (result < VMCI_SUCCESS)
2419                         return result;
2420         }
2421
2422         memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
2423                sizeof(entry->saved_produce_q));
2424         entry->produce_q->saved_header = &entry->saved_produce_q;
2425         memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
2426                sizeof(entry->saved_consume_q));
2427         entry->consume_q->saved_header = &entry->saved_consume_q;
2428
2429         return VMCI_SUCCESS;
2430 }
2431
2432 /*
2433  * Removes all references to the guest memory of a given queue pair, and
2434  * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
2435  * called when a VM is being quiesced where access to guest memory should
2436  * avoided.
2437  */
2438 int vmci_qp_broker_unmap(struct vmci_handle handle,
2439                          struct vmci_ctx *context,
2440                          u32 gid)
2441 {
2442         struct qp_broker_entry *entry;
2443         const u32 context_id = vmci_ctx_get_id(context);
2444         bool is_local = false;
2445         int result;
2446
2447         if (vmci_handle_is_invalid(handle) || !context ||
2448             context_id == VMCI_INVALID_ID)
2449                 return VMCI_ERROR_INVALID_ARGS;
2450
2451         mutex_lock(&qp_broker_list.mutex);
2452
2453         if (!vmci_ctx_qp_exists(context, handle)) {
2454                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2455                          context_id, handle.context, handle.resource);
2456                 result = VMCI_ERROR_NOT_FOUND;
2457                 goto out;
2458         }
2459
2460         entry = qp_broker_handle_to_entry(handle);
2461         if (!entry) {
2462                 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2463                          context_id, handle.context, handle.resource);
2464                 result = VMCI_ERROR_NOT_FOUND;
2465                 goto out;
2466         }
2467
2468         if (context_id != entry->create_id && context_id != entry->attach_id) {
2469                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2470                 goto out;
2471         }
2472
2473         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2474
2475         if (context_id != VMCI_HOST_CONTEXT_ID) {
2476                 qp_acquire_queue_mutex(entry->produce_q);
2477                 result = qp_save_headers(entry);
2478                 if (result < VMCI_SUCCESS)
2479                         pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2480                                 handle.context, handle.resource, result);
2481
2482                 qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
2483
2484                 /*
2485                  * On hosted, when we unmap queue pairs, the VMX will also
2486                  * unmap the guest memory, so we invalidate the previously
2487                  * registered memory. If the queue pair is mapped again at a
2488                  * later point in time, we will need to reregister the user
2489                  * memory with a possibly new user VA.
2490                  */
2491                 qp_host_unregister_user_memory(entry->produce_q,
2492                                                entry->consume_q);
2493
2494                 /*
2495                  * Move state from *_MEM to *_NO_MEM.
2496                  */
2497                 entry->state--;
2498
2499                 qp_release_queue_mutex(entry->produce_q);
2500         }
2501
2502         result = VMCI_SUCCESS;
2503
2504  out:
2505         mutex_unlock(&qp_broker_list.mutex);
2506         return result;
2507 }
2508
2509 /*
2510  * Destroys all guest queue pair endpoints. If active guest queue
2511  * pairs still exist, hypercalls to attempt detach from these
2512  * queue pairs will be made. Any failure to detach is silently
2513  * ignored.
2514  */
2515 void vmci_qp_guest_endpoints_exit(void)
2516 {
2517         struct qp_entry *entry;
2518         struct qp_guest_endpoint *ep;
2519
2520         mutex_lock(&qp_guest_endpoints.mutex);
2521
2522         while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
2523                 ep = (struct qp_guest_endpoint *)entry;
2524
2525                 /* Don't make a hypercall for local queue_pairs. */
2526                 if (!(entry->flags & VMCI_QPFLAG_LOCAL))
2527                         qp_detatch_hypercall(entry->handle);
2528
2529                 /* We cannot fail the exit, so let's reset ref_count. */
2530                 entry->ref_count = 0;
2531                 qp_list_remove_entry(&qp_guest_endpoints, entry);
2532
2533                 qp_guest_endpoint_destroy(ep);
2534         }
2535
2536         mutex_unlock(&qp_guest_endpoints.mutex);
2537 }
2538
2539 /*
2540  * Helper routine that will lock the queue pair before subsequent
2541  * operations.
2542  * Note: Non-blocking on the host side is currently only implemented in ESX.
2543  * Since non-blocking isn't yet implemented on the host personality we
2544  * have no reason to acquire a spin lock.  So to avoid the use of an
2545  * unnecessary lock only acquire the mutex if we can block.
2546  * Note: It is assumed that QPFLAG_PINNED implies QPFLAG_NONBLOCK.  Therefore
2547  * we can use the same locking function for access to both the queue
2548  * and the queue headers as it is the same logic.  Assert this behvior.
2549  */
2550 static void qp_lock(const struct vmci_qp *qpair)
2551 {
2552         if (vmci_can_block(qpair->flags))
2553                 qp_acquire_queue_mutex(qpair->produce_q);
2554 }
2555
2556 /*
2557  * Helper routine that unlocks the queue pair after calling
2558  * qp_lock.  Respects non-blocking and pinning flags.
2559  */
2560 static void qp_unlock(const struct vmci_qp *qpair)
2561 {
2562         if (vmci_can_block(qpair->flags))
2563                 qp_release_queue_mutex(qpair->produce_q);
2564 }
2565
2566 /*
2567  * The queue headers may not be mapped at all times. If a queue is
2568  * currently not mapped, it will be attempted to do so.
2569  */
2570 static int qp_map_queue_headers(struct vmci_queue *produce_q,
2571                                 struct vmci_queue *consume_q,
2572                                 bool can_block)
2573 {
2574         int result;
2575
2576         if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
2577                 if (can_block)
2578                         result = qp_host_map_queues(produce_q, consume_q);
2579                 else
2580                         result = VMCI_ERROR_QUEUEPAIR_NOT_READY;
2581
2582                 if (result < VMCI_SUCCESS)
2583                         return (produce_q->saved_header &&
2584                                 consume_q->saved_header) ?
2585                             VMCI_ERROR_QUEUEPAIR_NOT_READY :
2586                             VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2587         }
2588
2589         return VMCI_SUCCESS;
2590 }
2591
2592 /*
2593  * Helper routine that will retrieve the produce and consume
2594  * headers of a given queue pair. If the guest memory of the
2595  * queue pair is currently not available, the saved queue headers
2596  * will be returned, if these are available.
2597  */
2598 static int qp_get_queue_headers(const struct vmci_qp *qpair,
2599                                 struct vmci_queue_header **produce_q_header,
2600                                 struct vmci_queue_header **consume_q_header)
2601 {
2602         int result;
2603
2604         result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q,
2605                                       vmci_can_block(qpair->flags));
2606         if (result == VMCI_SUCCESS) {
2607                 *produce_q_header = qpair->produce_q->q_header;
2608                 *consume_q_header = qpair->consume_q->q_header;
2609         } else if (qpair->produce_q->saved_header &&
2610                    qpair->consume_q->saved_header) {
2611                 *produce_q_header = qpair->produce_q->saved_header;
2612                 *consume_q_header = qpair->consume_q->saved_header;
2613                 result = VMCI_SUCCESS;
2614         }
2615
2616         return result;
2617 }
2618
2619 /*
2620  * Callback from VMCI queue pair broker indicating that a queue
2621  * pair that was previously not ready, now either is ready or
2622  * gone forever.
2623  */
2624 static int qp_wakeup_cb(void *client_data)
2625 {
2626         struct vmci_qp *qpair = (struct vmci_qp *)client_data;
2627
2628         qp_lock(qpair);
2629         while (qpair->blocked > 0) {
2630                 qpair->blocked--;
2631                 qpair->generation++;
2632                 wake_up(&qpair->event);
2633         }
2634         qp_unlock(qpair);
2635
2636         return VMCI_SUCCESS;
2637 }
2638
2639 /*
2640  * Makes the calling thread wait for the queue pair to become
2641  * ready for host side access.  Returns true when thread is
2642  * woken up after queue pair state change, false otherwise.
2643  */
2644 static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
2645 {
2646         unsigned int generation;
2647
2648         if (qpair->flags & VMCI_QPFLAG_NONBLOCK)
2649                 return false;
2650
2651         qpair->blocked++;
2652         generation = qpair->generation;
2653         qp_unlock(qpair);
2654         wait_event(qpair->event, generation != qpair->generation);
2655         qp_lock(qpair);
2656
2657         return true;
2658 }
2659
2660 /*
2661  * Enqueues a given buffer to the produce queue using the provided
2662  * function. As many bytes as possible (space available in the queue)
2663  * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
2664  * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
2665  * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
2666  * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
2667  * an error occured when accessing the buffer,
2668  * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
2669  * available.  Otherwise, the number of bytes written to the queue is
2670  * returned.  Updates the tail pointer of the produce queue.
2671  */
2672 static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
2673                                  struct vmci_queue *consume_q,
2674                                  const u64 produce_q_size,
2675                                  const void *buf,
2676                                  size_t buf_size,
2677                                  vmci_memcpy_to_queue_func memcpy_to_queue,
2678                                  bool can_block)
2679 {
2680         s64 free_space;
2681         u64 tail;
2682         size_t written;
2683         ssize_t result;
2684
2685         result = qp_map_queue_headers(produce_q, consume_q, can_block);
2686         if (unlikely(result != VMCI_SUCCESS))
2687                 return result;
2688
2689         free_space = vmci_q_header_free_space(produce_q->q_header,
2690                                               consume_q->q_header,
2691                                               produce_q_size);
2692         if (free_space == 0)
2693                 return VMCI_ERROR_QUEUEPAIR_NOSPACE;
2694
2695         if (free_space < VMCI_SUCCESS)
2696                 return (ssize_t) free_space;
2697
2698         written = (size_t) (free_space > buf_size ? buf_size : free_space);
2699         tail = vmci_q_header_producer_tail(produce_q->q_header);
2700         if (likely(tail + written < produce_q_size)) {
2701                 result = memcpy_to_queue(produce_q, tail, buf, 0, written);
2702         } else {
2703                 /* Tail pointer wraps around. */
2704
2705                 const size_t tmp = (size_t) (produce_q_size - tail);
2706
2707                 result = memcpy_to_queue(produce_q, tail, buf, 0, tmp);
2708                 if (result >= VMCI_SUCCESS)
2709                         result = memcpy_to_queue(produce_q, 0, buf, tmp,
2710                                                  written - tmp);
2711         }
2712
2713         if (result < VMCI_SUCCESS)
2714                 return result;
2715
2716         vmci_q_header_add_producer_tail(produce_q->q_header, written,
2717                                         produce_q_size);
2718         return written;
2719 }
2720
2721 /*
2722  * Dequeues data (if available) from the given consume queue. Writes data
2723  * to the user provided buffer using the provided function.
2724  * Assumes the queue->mutex has been acquired.
2725  * Results:
2726  * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
2727  * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
2728  * (as defined by the queue size).
2729  * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
2730  * Otherwise the number of bytes dequeued is returned.
2731  * Side effects:
2732  * Updates the head pointer of the consume queue.
2733  */
2734 static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
2735                                  struct vmci_queue *consume_q,
2736                                  const u64 consume_q_size,
2737                                  void *buf,
2738                                  size_t buf_size,
2739                                  vmci_memcpy_from_queue_func memcpy_from_queue,
2740                                  bool update_consumer,
2741                                  bool can_block)
2742 {
2743         s64 buf_ready;
2744         u64 head;
2745         size_t read;
2746         ssize_t result;
2747
2748         result = qp_map_queue_headers(produce_q, consume_q, can_block);
2749         if (unlikely(result != VMCI_SUCCESS))
2750                 return result;
2751
2752         buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
2753                                             produce_q->q_header,
2754                                             consume_q_size);
2755         if (buf_ready == 0)
2756                 return VMCI_ERROR_QUEUEPAIR_NODATA;
2757
2758         if (buf_ready < VMCI_SUCCESS)
2759                 return (ssize_t) buf_ready;
2760
2761         read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
2762         head = vmci_q_header_consumer_head(produce_q->q_header);
2763         if (likely(head + read < consume_q_size)) {
2764                 result = memcpy_from_queue(buf, 0, consume_q, head, read);
2765         } else {
2766                 /* Head pointer wraps around. */
2767
2768                 const size_t tmp = (size_t) (consume_q_size - head);
2769
2770                 result = memcpy_from_queue(buf, 0, consume_q, head, tmp);
2771                 if (result >= VMCI_SUCCESS)
2772                         result = memcpy_from_queue(buf, tmp, consume_q, 0,
2773                                                    read - tmp);
2774
2775         }
2776
2777         if (result < VMCI_SUCCESS)
2778                 return result;
2779
2780         if (update_consumer)
2781                 vmci_q_header_add_consumer_head(produce_q->q_header,
2782                                                 read, consume_q_size);
2783
2784         return read;
2785 }
2786
2787 /*
2788  * vmci_qpair_alloc() - Allocates a queue pair.
2789  * @qpair:      Pointer for the new vmci_qp struct.
2790  * @handle:     Handle to track the resource.
2791  * @produce_qsize:      Desired size of the producer queue.
2792  * @consume_qsize:      Desired size of the consumer queue.
2793  * @peer:       ContextID of the peer.
2794  * @flags:      VMCI flags.
2795  * @priv_flags: VMCI priviledge flags.
2796  *
2797  * This is the client interface for allocating the memory for a
2798  * vmci_qp structure and then attaching to the underlying
2799  * queue.  If an error occurs allocating the memory for the
2800  * vmci_qp structure no attempt is made to attach.  If an
2801  * error occurs attaching, then the structure is freed.
2802  */
2803 int vmci_qpair_alloc(struct vmci_qp **qpair,
2804                      struct vmci_handle *handle,
2805                      u64 produce_qsize,
2806                      u64 consume_qsize,
2807                      u32 peer,
2808                      u32 flags,
2809                      u32 priv_flags)
2810 {
2811         struct vmci_qp *my_qpair;
2812         int retval;
2813         struct vmci_handle src = VMCI_INVALID_HANDLE;
2814         struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
2815         enum vmci_route route;
2816         vmci_event_release_cb wakeup_cb;
2817         void *client_data;
2818
2819         /*
2820          * Restrict the size of a queuepair.  The device already
2821          * enforces a limit on the total amount of memory that can be
2822          * allocated to queuepairs for a guest.  However, we try to
2823          * allocate this memory before we make the queuepair
2824          * allocation hypercall.  On Linux, we allocate each page
2825          * separately, which means rather than fail, the guest will
2826          * thrash while it tries to allocate, and will become
2827          * increasingly unresponsive to the point where it appears to
2828          * be hung.  So we place a limit on the size of an individual
2829          * queuepair here, and leave the device to enforce the
2830          * restriction on total queuepair memory.  (Note that this
2831          * doesn't prevent all cases; a user with only this much
2832          * physical memory could still get into trouble.)  The error
2833          * used by the device is NO_RESOURCES, so use that here too.
2834          */
2835
2836         if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
2837             produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
2838                 return VMCI_ERROR_NO_RESOURCES;
2839
2840         retval = vmci_route(&src, &dst, false, &route);
2841         if (retval < VMCI_SUCCESS)
2842                 route = vmci_guest_code_active() ?
2843                     VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
2844
2845         /* If NONBLOCK or PINNED is set, we better be the guest personality. */
2846         if ((!vmci_can_block(flags) || vmci_qp_pinned(flags)) &&
2847             VMCI_ROUTE_AS_GUEST != route) {
2848                 pr_devel("Not guest personality w/ NONBLOCK OR PINNED set");
2849                 return VMCI_ERROR_INVALID_ARGS;
2850         }
2851
2852         /*
2853          * Limit the size of pinned QPs and check sanity.
2854          *
2855          * Pinned pages implies non-blocking mode.  Mutexes aren't acquired
2856          * when the NONBLOCK flag is set in qpair code; and also should not be
2857          * acquired when the PINNED flagged is set.  Since pinning pages
2858          * implies we want speed, it makes no sense not to have NONBLOCK
2859          * set if PINNED is set.  Hence enforce this implication.
2860          */
2861         if (vmci_qp_pinned(flags)) {
2862                 if (vmci_can_block(flags)) {
2863                         pr_err("Attempted to enable pinning w/o non-blocking");
2864                         return VMCI_ERROR_INVALID_ARGS;
2865                 }
2866
2867                 if (produce_qsize + consume_qsize > VMCI_MAX_PINNED_QP_MEMORY)
2868                         return VMCI_ERROR_NO_RESOURCES;
2869         }
2870
2871         my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
2872         if (!my_qpair)
2873                 return VMCI_ERROR_NO_MEM;
2874
2875         my_qpair->produce_q_size = produce_qsize;
2876         my_qpair->consume_q_size = consume_qsize;
2877         my_qpair->peer = peer;
2878         my_qpair->flags = flags;
2879         my_qpair->priv_flags = priv_flags;
2880
2881         wakeup_cb = NULL;
2882         client_data = NULL;
2883
2884         if (VMCI_ROUTE_AS_HOST == route) {
2885                 my_qpair->guest_endpoint = false;
2886                 if (!(flags & VMCI_QPFLAG_LOCAL)) {
2887                         my_qpair->blocked = 0;
2888                         my_qpair->generation = 0;
2889                         init_waitqueue_head(&my_qpair->event);
2890                         wakeup_cb = qp_wakeup_cb;
2891                         client_data = (void *)my_qpair;
2892                 }
2893         } else {
2894                 my_qpair->guest_endpoint = true;
2895         }
2896
2897         retval = vmci_qp_alloc(handle,
2898                                &my_qpair->produce_q,
2899                                my_qpair->produce_q_size,
2900                                &my_qpair->consume_q,
2901                                my_qpair->consume_q_size,
2902                                my_qpair->peer,
2903                                my_qpair->flags,
2904                                my_qpair->priv_flags,
2905                                my_qpair->guest_endpoint,
2906                                wakeup_cb, client_data);
2907
2908         if (retval < VMCI_SUCCESS) {
2909                 kfree(my_qpair);
2910                 return retval;
2911         }
2912
2913         *qpair = my_qpair;
2914         my_qpair->handle = *handle;
2915
2916         return retval;
2917 }
2918 EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
2919
2920 /*
2921  * vmci_qpair_detach() - Detatches the client from a queue pair.
2922  * @qpair:      Reference of a pointer to the qpair struct.
2923  *
2924  * This is the client interface for detaching from a VMCIQPair.
2925  * Note that this routine will free the memory allocated for the
2926  * vmci_qp structure too.
2927  */
2928 int vmci_qpair_detach(struct vmci_qp **qpair)
2929 {
2930         int result;
2931         struct vmci_qp *old_qpair;
2932
2933         if (!qpair || !(*qpair))
2934                 return VMCI_ERROR_INVALID_ARGS;
2935
2936         old_qpair = *qpair;
2937         result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
2938
2939         /*
2940          * The guest can fail to detach for a number of reasons, and
2941          * if it does so, it will cleanup the entry (if there is one).
2942          * The host can fail too, but it won't cleanup the entry
2943          * immediately, it will do that later when the context is
2944          * freed.  Either way, we need to release the qpair struct
2945          * here; there isn't much the caller can do, and we don't want
2946          * to leak.
2947          */
2948
2949         memset(old_qpair, 0, sizeof(*old_qpair));
2950         old_qpair->handle = VMCI_INVALID_HANDLE;
2951         old_qpair->peer = VMCI_INVALID_ID;
2952         kfree(old_qpair);
2953         *qpair = NULL;
2954
2955         return result;
2956 }
2957 EXPORT_SYMBOL_GPL(vmci_qpair_detach);
2958
2959 /*
2960  * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
2961  * @qpair:      Pointer to the queue pair struct.
2962  * @producer_tail:      Reference used for storing producer tail index.
2963  * @consumer_head:      Reference used for storing the consumer head index.
2964  *
2965  * This is the client interface for getting the current indexes of the
2966  * QPair from the point of the view of the caller as the producer.
2967  */
2968 int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
2969                                    u64 *producer_tail,
2970                                    u64 *consumer_head)
2971 {
2972         struct vmci_queue_header *produce_q_header;
2973         struct vmci_queue_header *consume_q_header;
2974         int result;
2975
2976         if (!qpair)
2977                 return VMCI_ERROR_INVALID_ARGS;
2978
2979         qp_lock(qpair);
2980         result =
2981             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2982         if (result == VMCI_SUCCESS)
2983                 vmci_q_header_get_pointers(produce_q_header, consume_q_header,
2984                                            producer_tail, consumer_head);
2985         qp_unlock(qpair);
2986
2987         if (result == VMCI_SUCCESS &&
2988             ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
2989              (consumer_head && *consumer_head >= qpair->produce_q_size)))
2990                 return VMCI_ERROR_INVALID_SIZE;
2991
2992         return result;
2993 }
2994 EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
2995
2996 /*
2997  * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer.
2998  * @qpair:      Pointer to the queue pair struct.
2999  * @consumer_tail:      Reference used for storing consumer tail index.
3000  * @producer_head:      Reference used for storing the producer head index.
3001  *
3002  * This is the client interface for getting the current indexes of the
3003  * QPair from the point of the view of the caller as the consumer.
3004  */
3005 int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
3006                                    u64 *consumer_tail,
3007                                    u64 *producer_head)
3008 {
3009         struct vmci_queue_header *produce_q_header;
3010         struct vmci_queue_header *consume_q_header;
3011         int result;
3012
3013         if (!qpair)
3014                 return VMCI_ERROR_INVALID_ARGS;
3015
3016         qp_lock(qpair);
3017         result =
3018             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3019         if (result == VMCI_SUCCESS)
3020                 vmci_q_header_get_pointers(consume_q_header, produce_q_header,
3021                                            consumer_tail, producer_head);
3022         qp_unlock(qpair);
3023
3024         if (result == VMCI_SUCCESS &&
3025             ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
3026              (producer_head && *producer_head >= qpair->consume_q_size)))
3027                 return VMCI_ERROR_INVALID_SIZE;
3028
3029         return result;
3030 }
3031 EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
3032
3033 /*
3034  * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
3035  * @qpair:      Pointer to the queue pair struct.
3036  *
3037  * This is the client interface for getting the amount of free
3038  * space in the QPair from the point of the view of the caller as
3039  * the producer which is the common case.  Returns < 0 if err, else
3040  * available bytes into which data can be enqueued if > 0.
3041  */
3042 s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
3043 {
3044         struct vmci_queue_header *produce_q_header;
3045         struct vmci_queue_header *consume_q_header;
3046         s64 result;
3047
3048         if (!qpair)
3049                 return VMCI_ERROR_INVALID_ARGS;
3050
3051         qp_lock(qpair);
3052         result =
3053             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3054         if (result == VMCI_SUCCESS)
3055                 result = vmci_q_header_free_space(produce_q_header,
3056                                                   consume_q_header,
3057                                                   qpair->produce_q_size);
3058         else
3059                 result = 0;
3060
3061         qp_unlock(qpair);
3062
3063         return result;
3064 }
3065 EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
3066
3067 /*
3068  * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
3069  * @qpair:      Pointer to the queue pair struct.
3070  *
3071  * This is the client interface for getting the amount of free
3072  * space in the QPair from the point of the view of the caller as
3073  * the consumer which is not the common case.  Returns < 0 if err, else
3074  * available bytes into which data can be enqueued if > 0.
3075  */
3076 s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
3077 {
3078         struct vmci_queue_header *produce_q_header;
3079         struct vmci_queue_header *consume_q_header;
3080         s64 result;
3081
3082         if (!qpair)
3083                 return VMCI_ERROR_INVALID_ARGS;
3084
3085         qp_lock(qpair);
3086         result =
3087             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3088         if (result == VMCI_SUCCESS)
3089                 result = vmci_q_header_free_space(consume_q_header,
3090                                                   produce_q_header,
3091                                                   qpair->consume_q_size);
3092         else
3093                 result = 0;
3094
3095         qp_unlock(qpair);
3096
3097         return result;
3098 }
3099 EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
3100
3101 /*
3102  * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
3103  * producer queue.
3104  * @qpair:      Pointer to the queue pair struct.
3105  *
3106  * This is the client interface for getting the amount of
3107  * enqueued data in the QPair from the point of the view of the
3108  * caller as the producer which is not the common case.  Returns < 0 if err,
3109  * else available bytes that may be read.
3110  */
3111 s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
3112 {
3113         struct vmci_queue_header *produce_q_header;
3114         struct vmci_queue_header *consume_q_header;
3115         s64 result;
3116
3117         if (!qpair)
3118                 return VMCI_ERROR_INVALID_ARGS;
3119
3120         qp_lock(qpair);
3121         result =
3122             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3123         if (result == VMCI_SUCCESS)
3124                 result = vmci_q_header_buf_ready(produce_q_header,
3125                                                  consume_q_header,
3126                                                  qpair->produce_q_size);
3127         else
3128                 result = 0;
3129
3130         qp_unlock(qpair);
3131
3132         return result;
3133 }
3134 EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
3135
3136 /*
3137  * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
3138  * consumer queue.
3139  * @qpair:      Pointer to the queue pair struct.
3140  *
3141  * This is the client interface for getting the amount of
3142  * enqueued data in the QPair from the point of the view of the
3143  * caller as the consumer which is the normal case.  Returns < 0 if err,
3144  * else available bytes that may be read.
3145  */
3146 s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
3147 {
3148         struct vmci_queue_header *produce_q_header;
3149         struct vmci_queue_header *consume_q_header;
3150         s64 result;
3151
3152         if (!qpair)
3153                 return VMCI_ERROR_INVALID_ARGS;
3154
3155         qp_lock(qpair);
3156         result =
3157             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3158         if (result == VMCI_SUCCESS)
3159                 result = vmci_q_header_buf_ready(consume_q_header,
3160                                                  produce_q_header,
3161                                                  qpair->consume_q_size);
3162         else
3163                 result = 0;
3164
3165         qp_unlock(qpair);
3166
3167         return result;
3168 }
3169 EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
3170
3171 /*
3172  * vmci_qpair_enqueue() - Throw data on the queue.
3173  * @qpair:      Pointer to the queue pair struct.
3174  * @buf:        Pointer to buffer containing data
3175  * @buf_size:   Length of buffer.
3176  * @buf_type:   Buffer type (Unused).
3177  *
3178  * This is the client interface for enqueueing data into the queue.
3179  * Returns number of bytes enqueued or < 0 on error.
3180  */
3181 ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
3182                            const void *buf,
3183                            size_t buf_size,
3184                            int buf_type)
3185 {
3186         ssize_t result;
3187
3188         if (!qpair || !buf)
3189                 return VMCI_ERROR_INVALID_ARGS;
3190
3191         qp_lock(qpair);
3192
3193         do {
3194                 result = qp_enqueue_locked(qpair->produce_q,
3195                                            qpair->consume_q,
3196                                            qpair->produce_q_size,
3197                                            buf, buf_size,
3198                                            qp_memcpy_to_queue,
3199                                            vmci_can_block(qpair->flags));
3200
3201                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3202                     !qp_wait_for_ready_queue(qpair))
3203                         result = VMCI_ERROR_WOULD_BLOCK;
3204
3205         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3206
3207         qp_unlock(qpair);
3208
3209         return result;
3210 }
3211 EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
3212
3213 /*
3214  * vmci_qpair_dequeue() - Get data from the queue.
3215  * @qpair:      Pointer to the queue pair struct.
3216  * @buf:        Pointer to buffer for the data
3217  * @buf_size:   Length of buffer.
3218  * @buf_type:   Buffer type (Unused).
3219  *
3220  * This is the client interface for dequeueing data from the queue.
3221  * Returns number of bytes dequeued or < 0 on error.
3222  */
3223 ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
3224                            void *buf,
3225                            size_t buf_size,
3226                            int buf_type)
3227 {
3228         ssize_t result;
3229
3230         if (!qpair || !buf)
3231                 return VMCI_ERROR_INVALID_ARGS;
3232
3233         qp_lock(qpair);
3234
3235         do {
3236                 result = qp_dequeue_locked(qpair->produce_q,
3237                                            qpair->consume_q,
3238                                            qpair->consume_q_size,
3239                                            buf, buf_size,
3240                                            qp_memcpy_from_queue, true,
3241                                            vmci_can_block(qpair->flags));
3242
3243                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3244                     !qp_wait_for_ready_queue(qpair))
3245                         result = VMCI_ERROR_WOULD_BLOCK;
3246
3247         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3248
3249         qp_unlock(qpair);
3250
3251         return result;
3252 }
3253 EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
3254
3255 /*
3256  * vmci_qpair_peek() - Peek at the data in the queue.
3257  * @qpair:      Pointer to the queue pair struct.
3258  * @buf:        Pointer to buffer for the data
3259  * @buf_size:   Length of buffer.
3260  * @buf_type:   Buffer type (Unused on Linux).
3261  *
3262  * This is the client interface for peeking into a queue.  (I.e.,
3263  * copy data from the queue without updating the head pointer.)
3264  * Returns number of bytes dequeued or < 0 on error.
3265  */
3266 ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
3267                         void *buf,
3268                         size_t buf_size,
3269                         int buf_type)
3270 {
3271         ssize_t result;
3272
3273         if (!qpair || !buf)
3274                 return VMCI_ERROR_INVALID_ARGS;
3275
3276         qp_lock(qpair);
3277
3278         do {
3279                 result = qp_dequeue_locked(qpair->produce_q,
3280                                            qpair->consume_q,
3281                                            qpair->consume_q_size,
3282                                            buf, buf_size,
3283                                            qp_memcpy_from_queue, false,
3284                                            vmci_can_block(qpair->flags));
3285
3286                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3287                     !qp_wait_for_ready_queue(qpair))
3288                         result = VMCI_ERROR_WOULD_BLOCK;
3289
3290         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3291
3292         qp_unlock(qpair);
3293
3294         return result;
3295 }
3296 EXPORT_SYMBOL_GPL(vmci_qpair_peek);
3297
3298 /*
3299  * vmci_qpair_enquev() - Throw data on the queue using iov.
3300  * @qpair:      Pointer to the queue pair struct.
3301  * @iov:        Pointer to buffer containing data
3302  * @iov_size:   Length of buffer.
3303  * @buf_type:   Buffer type (Unused).
3304  *
3305  * This is the client interface for enqueueing data into the queue.
3306  * This function uses IO vectors to handle the work. Returns number
3307  * of bytes enqueued or < 0 on error.
3308  */
3309 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3310                           void *iov,
3311                           size_t iov_size,
3312                           int buf_type)
3313 {
3314         ssize_t result;
3315
3316         if (!qpair || !iov)
3317                 return VMCI_ERROR_INVALID_ARGS;
3318
3319         qp_lock(qpair);
3320
3321         do {
3322                 result = qp_enqueue_locked(qpair->produce_q,
3323                                            qpair->consume_q,
3324                                            qpair->produce_q_size,
3325                                            iov, iov_size,
3326                                            qp_memcpy_to_queue_iov,
3327                                            vmci_can_block(qpair->flags));
3328
3329                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3330                     !qp_wait_for_ready_queue(qpair))
3331                         result = VMCI_ERROR_WOULD_BLOCK;
3332
3333         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3334
3335         qp_unlock(qpair);
3336
3337         return result;
3338 }
3339 EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
3340
3341 /*
3342  * vmci_qpair_dequev() - Get data from the queue using iov.
3343  * @qpair:      Pointer to the queue pair struct.
3344  * @iov:        Pointer to buffer for the data
3345  * @iov_size:   Length of buffer.
3346  * @buf_type:   Buffer type (Unused).
3347  *
3348  * This is the client interface for dequeueing data from the queue.
3349  * This function uses IO vectors to handle the work. Returns number
3350  * of bytes dequeued or < 0 on error.
3351  */
3352 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
3353                           void *iov,
3354                           size_t iov_size,
3355                           int buf_type)
3356 {
3357         ssize_t result;
3358
3359         if (!qpair || !iov)
3360                 return VMCI_ERROR_INVALID_ARGS;
3361
3362         qp_lock(qpair);
3363
3364         do {
3365                 result = qp_dequeue_locked(qpair->produce_q,
3366                                            qpair->consume_q,
3367                                            qpair->consume_q_size,
3368                                            iov, iov_size,
3369                                            qp_memcpy_from_queue_iov,
3370                                            true, vmci_can_block(qpair->flags));
3371
3372                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3373                     !qp_wait_for_ready_queue(qpair))
3374                         result = VMCI_ERROR_WOULD_BLOCK;
3375
3376         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3377
3378         qp_unlock(qpair);
3379
3380         return result;
3381 }
3382 EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
3383
3384 /*
3385  * vmci_qpair_peekv() - Peek at the data in the queue using iov.
3386  * @qpair:      Pointer to the queue pair struct.
3387  * @iov:        Pointer to buffer for the data
3388  * @iov_size:   Length of buffer.
3389  * @buf_type:   Buffer type (Unused on Linux).
3390  *
3391  * This is the client interface for peeking into a queue.  (I.e.,
3392  * copy data from the queue without updating the head pointer.)
3393  * This function uses IO vectors to handle the work. Returns number
3394  * of bytes peeked or < 0 on error.
3395  */
3396 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
3397                          void *iov,
3398                          size_t iov_size,
3399                          int buf_type)
3400 {
3401         ssize_t result;
3402
3403         if (!qpair || !iov)
3404                 return VMCI_ERROR_INVALID_ARGS;
3405
3406         qp_lock(qpair);
3407
3408         do {
3409                 result = qp_dequeue_locked(qpair->produce_q,
3410                                            qpair->consume_q,
3411                                            qpair->consume_q_size,
3412                                            iov, iov_size,
3413                                            qp_memcpy_from_queue_iov,
3414                                            false, vmci_can_block(qpair->flags));
3415
3416                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3417                     !qp_wait_for_ready_queue(qpair))
3418                         result = VMCI_ERROR_WOULD_BLOCK;
3419
3420         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3421
3422         qp_unlock(qpair);
3423         return result;
3424 }
3425 EXPORT_SYMBOL_GPL(vmci_qpair_peekv);