ee59e06c21943e7fc9691911778c43d747dac1cc
[firefly-linux-kernel-4.4.55.git] / drivers / hv / vmbus_drv.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  *   K. Y. Srinivasan <kys@microsoft.com>
21  *
22  */
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24
25 #include <linux/init.h>
26 #include <linux/module.h>
27 #include <linux/device.h>
28 #include <linux/interrupt.h>
29 #include <linux/sysctl.h>
30 #include <linux/slab.h>
31 #include <linux/acpi.h>
32 #include <linux/completion.h>
33 #include <linux/hyperv.h>
34 #include <linux/kernel_stat.h>
35 #include <linux/clockchips.h>
36 #include <linux/cpu.h>
37 #include <asm/hyperv.h>
38 #include <asm/hypervisor.h>
39 #include <asm/mshyperv.h>
40 #include <linux/notifier.h>
41 #include <linux/ptrace.h>
42 #include <linux/kdebug.h>
43 #include "hyperv_vmbus.h"
44
45 static struct acpi_device  *hv_acpi_dev;
46
47 static struct tasklet_struct msg_dpc;
48 static struct completion probe_event;
49 static int irq;
50
51
52 static void hyperv_report_panic(struct pt_regs *regs)
53 {
54         static bool panic_reported;
55
56         /*
57          * We prefer to report panic on 'die' chain as we have proper
58          * registers to report, but if we miss it (e.g. on BUG()) we need
59          * to report it on 'panic'.
60          */
61         if (panic_reported)
62                 return;
63         panic_reported = true;
64
65         wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
66         wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
67         wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx);
68         wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx);
69         wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx);
70
71         /*
72          * Let Hyper-V know there is crash data available
73          */
74         wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
75 }
76
77 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
78                               void *args)
79 {
80         struct pt_regs *regs;
81
82         regs = current_pt_regs();
83
84         hyperv_report_panic(regs);
85         return NOTIFY_DONE;
86 }
87
88 static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
89                             void *args)
90 {
91         struct die_args *die = (struct die_args *)args;
92         struct pt_regs *regs = die->regs;
93
94         hyperv_report_panic(regs);
95         return NOTIFY_DONE;
96 }
97
98 static struct notifier_block hyperv_die_block = {
99         .notifier_call = hyperv_die_event,
100 };
101 static struct notifier_block hyperv_panic_block = {
102         .notifier_call = hyperv_panic_event,
103 };
104
105 struct resource *hyperv_mmio;
106 EXPORT_SYMBOL_GPL(hyperv_mmio);
107
108 static int vmbus_exists(void)
109 {
110         if (hv_acpi_dev == NULL)
111                 return -ENODEV;
112
113         return 0;
114 }
115
116 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
117 static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
118 {
119         int i;
120         for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
121                 sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
122 }
123
124 static u8 channel_monitor_group(struct vmbus_channel *channel)
125 {
126         return (u8)channel->offermsg.monitorid / 32;
127 }
128
129 static u8 channel_monitor_offset(struct vmbus_channel *channel)
130 {
131         return (u8)channel->offermsg.monitorid % 32;
132 }
133
134 static u32 channel_pending(struct vmbus_channel *channel,
135                            struct hv_monitor_page *monitor_page)
136 {
137         u8 monitor_group = channel_monitor_group(channel);
138         return monitor_page->trigger_group[monitor_group].pending;
139 }
140
141 static u32 channel_latency(struct vmbus_channel *channel,
142                            struct hv_monitor_page *monitor_page)
143 {
144         u8 monitor_group = channel_monitor_group(channel);
145         u8 monitor_offset = channel_monitor_offset(channel);
146         return monitor_page->latency[monitor_group][monitor_offset];
147 }
148
149 static u32 channel_conn_id(struct vmbus_channel *channel,
150                            struct hv_monitor_page *monitor_page)
151 {
152         u8 monitor_group = channel_monitor_group(channel);
153         u8 monitor_offset = channel_monitor_offset(channel);
154         return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
155 }
156
157 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
158                        char *buf)
159 {
160         struct hv_device *hv_dev = device_to_hv_device(dev);
161
162         if (!hv_dev->channel)
163                 return -ENODEV;
164         return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
165 }
166 static DEVICE_ATTR_RO(id);
167
168 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
169                           char *buf)
170 {
171         struct hv_device *hv_dev = device_to_hv_device(dev);
172
173         if (!hv_dev->channel)
174                 return -ENODEV;
175         return sprintf(buf, "%d\n", hv_dev->channel->state);
176 }
177 static DEVICE_ATTR_RO(state);
178
179 static ssize_t monitor_id_show(struct device *dev,
180                                struct device_attribute *dev_attr, char *buf)
181 {
182         struct hv_device *hv_dev = device_to_hv_device(dev);
183
184         if (!hv_dev->channel)
185                 return -ENODEV;
186         return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
187 }
188 static DEVICE_ATTR_RO(monitor_id);
189
190 static ssize_t class_id_show(struct device *dev,
191                                struct device_attribute *dev_attr, char *buf)
192 {
193         struct hv_device *hv_dev = device_to_hv_device(dev);
194
195         if (!hv_dev->channel)
196                 return -ENODEV;
197         return sprintf(buf, "{%pUl}\n",
198                        hv_dev->channel->offermsg.offer.if_type.b);
199 }
200 static DEVICE_ATTR_RO(class_id);
201
202 static ssize_t device_id_show(struct device *dev,
203                               struct device_attribute *dev_attr, char *buf)
204 {
205         struct hv_device *hv_dev = device_to_hv_device(dev);
206
207         if (!hv_dev->channel)
208                 return -ENODEV;
209         return sprintf(buf, "{%pUl}\n",
210                        hv_dev->channel->offermsg.offer.if_instance.b);
211 }
212 static DEVICE_ATTR_RO(device_id);
213
214 static ssize_t modalias_show(struct device *dev,
215                              struct device_attribute *dev_attr, char *buf)
216 {
217         struct hv_device *hv_dev = device_to_hv_device(dev);
218         char alias_name[VMBUS_ALIAS_LEN + 1];
219
220         print_alias_name(hv_dev, alias_name);
221         return sprintf(buf, "vmbus:%s\n", alias_name);
222 }
223 static DEVICE_ATTR_RO(modalias);
224
225 static ssize_t server_monitor_pending_show(struct device *dev,
226                                            struct device_attribute *dev_attr,
227                                            char *buf)
228 {
229         struct hv_device *hv_dev = device_to_hv_device(dev);
230
231         if (!hv_dev->channel)
232                 return -ENODEV;
233         return sprintf(buf, "%d\n",
234                        channel_pending(hv_dev->channel,
235                                        vmbus_connection.monitor_pages[1]));
236 }
237 static DEVICE_ATTR_RO(server_monitor_pending);
238
239 static ssize_t client_monitor_pending_show(struct device *dev,
240                                            struct device_attribute *dev_attr,
241                                            char *buf)
242 {
243         struct hv_device *hv_dev = device_to_hv_device(dev);
244
245         if (!hv_dev->channel)
246                 return -ENODEV;
247         return sprintf(buf, "%d\n",
248                        channel_pending(hv_dev->channel,
249                                        vmbus_connection.monitor_pages[1]));
250 }
251 static DEVICE_ATTR_RO(client_monitor_pending);
252
253 static ssize_t server_monitor_latency_show(struct device *dev,
254                                            struct device_attribute *dev_attr,
255                                            char *buf)
256 {
257         struct hv_device *hv_dev = device_to_hv_device(dev);
258
259         if (!hv_dev->channel)
260                 return -ENODEV;
261         return sprintf(buf, "%d\n",
262                        channel_latency(hv_dev->channel,
263                                        vmbus_connection.monitor_pages[0]));
264 }
265 static DEVICE_ATTR_RO(server_monitor_latency);
266
267 static ssize_t client_monitor_latency_show(struct device *dev,
268                                            struct device_attribute *dev_attr,
269                                            char *buf)
270 {
271         struct hv_device *hv_dev = device_to_hv_device(dev);
272
273         if (!hv_dev->channel)
274                 return -ENODEV;
275         return sprintf(buf, "%d\n",
276                        channel_latency(hv_dev->channel,
277                                        vmbus_connection.monitor_pages[1]));
278 }
279 static DEVICE_ATTR_RO(client_monitor_latency);
280
281 static ssize_t server_monitor_conn_id_show(struct device *dev,
282                                            struct device_attribute *dev_attr,
283                                            char *buf)
284 {
285         struct hv_device *hv_dev = device_to_hv_device(dev);
286
287         if (!hv_dev->channel)
288                 return -ENODEV;
289         return sprintf(buf, "%d\n",
290                        channel_conn_id(hv_dev->channel,
291                                        vmbus_connection.monitor_pages[0]));
292 }
293 static DEVICE_ATTR_RO(server_monitor_conn_id);
294
295 static ssize_t client_monitor_conn_id_show(struct device *dev,
296                                            struct device_attribute *dev_attr,
297                                            char *buf)
298 {
299         struct hv_device *hv_dev = device_to_hv_device(dev);
300
301         if (!hv_dev->channel)
302                 return -ENODEV;
303         return sprintf(buf, "%d\n",
304                        channel_conn_id(hv_dev->channel,
305                                        vmbus_connection.monitor_pages[1]));
306 }
307 static DEVICE_ATTR_RO(client_monitor_conn_id);
308
309 static ssize_t out_intr_mask_show(struct device *dev,
310                                   struct device_attribute *dev_attr, char *buf)
311 {
312         struct hv_device *hv_dev = device_to_hv_device(dev);
313         struct hv_ring_buffer_debug_info outbound;
314
315         if (!hv_dev->channel)
316                 return -ENODEV;
317         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
318         return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
319 }
320 static DEVICE_ATTR_RO(out_intr_mask);
321
322 static ssize_t out_read_index_show(struct device *dev,
323                                    struct device_attribute *dev_attr, char *buf)
324 {
325         struct hv_device *hv_dev = device_to_hv_device(dev);
326         struct hv_ring_buffer_debug_info outbound;
327
328         if (!hv_dev->channel)
329                 return -ENODEV;
330         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
331         return sprintf(buf, "%d\n", outbound.current_read_index);
332 }
333 static DEVICE_ATTR_RO(out_read_index);
334
335 static ssize_t out_write_index_show(struct device *dev,
336                                     struct device_attribute *dev_attr,
337                                     char *buf)
338 {
339         struct hv_device *hv_dev = device_to_hv_device(dev);
340         struct hv_ring_buffer_debug_info outbound;
341
342         if (!hv_dev->channel)
343                 return -ENODEV;
344         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
345         return sprintf(buf, "%d\n", outbound.current_write_index);
346 }
347 static DEVICE_ATTR_RO(out_write_index);
348
349 static ssize_t out_read_bytes_avail_show(struct device *dev,
350                                          struct device_attribute *dev_attr,
351                                          char *buf)
352 {
353         struct hv_device *hv_dev = device_to_hv_device(dev);
354         struct hv_ring_buffer_debug_info outbound;
355
356         if (!hv_dev->channel)
357                 return -ENODEV;
358         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
359         return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
360 }
361 static DEVICE_ATTR_RO(out_read_bytes_avail);
362
363 static ssize_t out_write_bytes_avail_show(struct device *dev,
364                                           struct device_attribute *dev_attr,
365                                           char *buf)
366 {
367         struct hv_device *hv_dev = device_to_hv_device(dev);
368         struct hv_ring_buffer_debug_info outbound;
369
370         if (!hv_dev->channel)
371                 return -ENODEV;
372         hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
373         return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
374 }
375 static DEVICE_ATTR_RO(out_write_bytes_avail);
376
377 static ssize_t in_intr_mask_show(struct device *dev,
378                                  struct device_attribute *dev_attr, char *buf)
379 {
380         struct hv_device *hv_dev = device_to_hv_device(dev);
381         struct hv_ring_buffer_debug_info inbound;
382
383         if (!hv_dev->channel)
384                 return -ENODEV;
385         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
386         return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
387 }
388 static DEVICE_ATTR_RO(in_intr_mask);
389
390 static ssize_t in_read_index_show(struct device *dev,
391                                   struct device_attribute *dev_attr, char *buf)
392 {
393         struct hv_device *hv_dev = device_to_hv_device(dev);
394         struct hv_ring_buffer_debug_info inbound;
395
396         if (!hv_dev->channel)
397                 return -ENODEV;
398         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
399         return sprintf(buf, "%d\n", inbound.current_read_index);
400 }
401 static DEVICE_ATTR_RO(in_read_index);
402
403 static ssize_t in_write_index_show(struct device *dev,
404                                    struct device_attribute *dev_attr, char *buf)
405 {
406         struct hv_device *hv_dev = device_to_hv_device(dev);
407         struct hv_ring_buffer_debug_info inbound;
408
409         if (!hv_dev->channel)
410                 return -ENODEV;
411         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
412         return sprintf(buf, "%d\n", inbound.current_write_index);
413 }
414 static DEVICE_ATTR_RO(in_write_index);
415
416 static ssize_t in_read_bytes_avail_show(struct device *dev,
417                                         struct device_attribute *dev_attr,
418                                         char *buf)
419 {
420         struct hv_device *hv_dev = device_to_hv_device(dev);
421         struct hv_ring_buffer_debug_info inbound;
422
423         if (!hv_dev->channel)
424                 return -ENODEV;
425         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
426         return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
427 }
428 static DEVICE_ATTR_RO(in_read_bytes_avail);
429
430 static ssize_t in_write_bytes_avail_show(struct device *dev,
431                                          struct device_attribute *dev_attr,
432                                          char *buf)
433 {
434         struct hv_device *hv_dev = device_to_hv_device(dev);
435         struct hv_ring_buffer_debug_info inbound;
436
437         if (!hv_dev->channel)
438                 return -ENODEV;
439         hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
440         return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
441 }
442 static DEVICE_ATTR_RO(in_write_bytes_avail);
443
444 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
445 static struct attribute *vmbus_attrs[] = {
446         &dev_attr_id.attr,
447         &dev_attr_state.attr,
448         &dev_attr_monitor_id.attr,
449         &dev_attr_class_id.attr,
450         &dev_attr_device_id.attr,
451         &dev_attr_modalias.attr,
452         &dev_attr_server_monitor_pending.attr,
453         &dev_attr_client_monitor_pending.attr,
454         &dev_attr_server_monitor_latency.attr,
455         &dev_attr_client_monitor_latency.attr,
456         &dev_attr_server_monitor_conn_id.attr,
457         &dev_attr_client_monitor_conn_id.attr,
458         &dev_attr_out_intr_mask.attr,
459         &dev_attr_out_read_index.attr,
460         &dev_attr_out_write_index.attr,
461         &dev_attr_out_read_bytes_avail.attr,
462         &dev_attr_out_write_bytes_avail.attr,
463         &dev_attr_in_intr_mask.attr,
464         &dev_attr_in_read_index.attr,
465         &dev_attr_in_write_index.attr,
466         &dev_attr_in_read_bytes_avail.attr,
467         &dev_attr_in_write_bytes_avail.attr,
468         NULL,
469 };
470 ATTRIBUTE_GROUPS(vmbus);
471
472 /*
473  * vmbus_uevent - add uevent for our device
474  *
475  * This routine is invoked when a device is added or removed on the vmbus to
476  * generate a uevent to udev in the userspace. The udev will then look at its
477  * rule and the uevent generated here to load the appropriate driver
478  *
479  * The alias string will be of the form vmbus:guid where guid is the string
480  * representation of the device guid (each byte of the guid will be
481  * represented with two hex characters.
482  */
483 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
484 {
485         struct hv_device *dev = device_to_hv_device(device);
486         int ret;
487         char alias_name[VMBUS_ALIAS_LEN + 1];
488
489         print_alias_name(dev, alias_name);
490         ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
491         return ret;
492 }
493
494 static const uuid_le null_guid;
495
496 static inline bool is_null_guid(const __u8 *guid)
497 {
498         if (memcmp(guid, &null_guid, sizeof(uuid_le)))
499                 return false;
500         return true;
501 }
502
503 /*
504  * Return a matching hv_vmbus_device_id pointer.
505  * If there is no match, return NULL.
506  */
507 static const struct hv_vmbus_device_id *hv_vmbus_get_id(
508                                         const struct hv_vmbus_device_id *id,
509                                         const __u8 *guid)
510 {
511         for (; !is_null_guid(id->guid); id++)
512                 if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
513                         return id;
514
515         return NULL;
516 }
517
518
519
520 /*
521  * vmbus_match - Attempt to match the specified device to the specified driver
522  */
523 static int vmbus_match(struct device *device, struct device_driver *driver)
524 {
525         struct hv_driver *drv = drv_to_hv_drv(driver);
526         struct hv_device *hv_dev = device_to_hv_device(device);
527
528         if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b))
529                 return 1;
530
531         return 0;
532 }
533
534 /*
535  * vmbus_probe - Add the new vmbus's child device
536  */
537 static int vmbus_probe(struct device *child_device)
538 {
539         int ret = 0;
540         struct hv_driver *drv =
541                         drv_to_hv_drv(child_device->driver);
542         struct hv_device *dev = device_to_hv_device(child_device);
543         const struct hv_vmbus_device_id *dev_id;
544
545         dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b);
546         if (drv->probe) {
547                 ret = drv->probe(dev, dev_id);
548                 if (ret != 0)
549                         pr_err("probe failed for device %s (%d)\n",
550                                dev_name(child_device), ret);
551
552         } else {
553                 pr_err("probe not set for driver %s\n",
554                        dev_name(child_device));
555                 ret = -ENODEV;
556         }
557         return ret;
558 }
559
560 /*
561  * vmbus_remove - Remove a vmbus device
562  */
563 static int vmbus_remove(struct device *child_device)
564 {
565         struct hv_driver *drv;
566         struct hv_device *dev = device_to_hv_device(child_device);
567         u32 relid = dev->channel->offermsg.child_relid;
568
569         if (child_device->driver) {
570                 drv = drv_to_hv_drv(child_device->driver);
571                 if (drv->remove)
572                         drv->remove(dev);
573                 else {
574                         hv_process_channel_removal(dev->channel, relid);
575                         pr_err("remove not set for driver %s\n",
576                                 dev_name(child_device));
577                 }
578         } else {
579                 /*
580                  * We don't have a driver for this device; deal with the
581                  * rescind message by removing the channel.
582                  */
583                 hv_process_channel_removal(dev->channel, relid);
584         }
585
586         return 0;
587 }
588
589
590 /*
591  * vmbus_shutdown - Shutdown a vmbus device
592  */
593 static void vmbus_shutdown(struct device *child_device)
594 {
595         struct hv_driver *drv;
596         struct hv_device *dev = device_to_hv_device(child_device);
597
598
599         /* The device may not be attached yet */
600         if (!child_device->driver)
601                 return;
602
603         drv = drv_to_hv_drv(child_device->driver);
604
605         if (drv->shutdown)
606                 drv->shutdown(dev);
607
608         return;
609 }
610
611
612 /*
613  * vmbus_device_release - Final callback release of the vmbus child device
614  */
615 static void vmbus_device_release(struct device *device)
616 {
617         struct hv_device *hv_dev = device_to_hv_device(device);
618
619         kfree(hv_dev);
620
621 }
622
623 /* The one and only one */
624 static struct bus_type  hv_bus = {
625         .name =         "vmbus",
626         .match =                vmbus_match,
627         .shutdown =             vmbus_shutdown,
628         .remove =               vmbus_remove,
629         .probe =                vmbus_probe,
630         .uevent =               vmbus_uevent,
631         .dev_groups =           vmbus_groups,
632 };
633
634 struct onmessage_work_context {
635         struct work_struct work;
636         struct hv_message msg;
637 };
638
639 static void vmbus_onmessage_work(struct work_struct *work)
640 {
641         struct onmessage_work_context *ctx;
642
643         /* Do not process messages if we're in DISCONNECTED state */
644         if (vmbus_connection.conn_state == DISCONNECTED)
645                 return;
646
647         ctx = container_of(work, struct onmessage_work_context,
648                            work);
649         vmbus_onmessage(&ctx->msg);
650         kfree(ctx);
651 }
652
653 static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
654 {
655         struct clock_event_device *dev = hv_context.clk_evt[cpu];
656
657         if (dev->event_handler)
658                 dev->event_handler(dev);
659
660         msg->header.message_type = HVMSG_NONE;
661
662         /*
663          * Make sure the write to MessageType (ie set to
664          * HVMSG_NONE) happens before we read the
665          * MessagePending and EOMing. Otherwise, the EOMing
666          * will not deliver any more messages since there is
667          * no empty slot
668          */
669         mb();
670
671         if (msg->header.message_flags.msg_pending) {
672                 /*
673                  * This will cause message queue rescan to
674                  * possibly deliver another msg from the
675                  * hypervisor
676                  */
677                 wrmsrl(HV_X64_MSR_EOM, 0);
678         }
679 }
680
681 static void vmbus_on_msg_dpc(unsigned long data)
682 {
683         int cpu = smp_processor_id();
684         void *page_addr = hv_context.synic_message_page[cpu];
685         struct hv_message *msg = (struct hv_message *)page_addr +
686                                   VMBUS_MESSAGE_SINT;
687         struct vmbus_channel_message_header *hdr;
688         struct vmbus_channel_message_table_entry *entry;
689         struct onmessage_work_context *ctx;
690
691         while (1) {
692                 if (msg->header.message_type == HVMSG_NONE)
693                         /* no msg */
694                         break;
695
696                 hdr = (struct vmbus_channel_message_header *)msg->u.payload;
697
698                 if (hdr->msgtype >= CHANNELMSG_COUNT) {
699                         WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
700                         goto msg_handled;
701                 }
702
703                 entry = &channel_message_table[hdr->msgtype];
704                 if (entry->handler_type == VMHT_BLOCKING) {
705                         ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
706                         if (ctx == NULL)
707                                 continue;
708
709                         INIT_WORK(&ctx->work, vmbus_onmessage_work);
710                         memcpy(&ctx->msg, msg, sizeof(*msg));
711
712                         queue_work(vmbus_connection.work_queue, &ctx->work);
713                 } else
714                         entry->message_handler(hdr);
715
716 msg_handled:
717                 msg->header.message_type = HVMSG_NONE;
718
719                 /*
720                  * Make sure the write to MessageType (ie set to
721                  * HVMSG_NONE) happens before we read the
722                  * MessagePending and EOMing. Otherwise, the EOMing
723                  * will not deliver any more messages since there is
724                  * no empty slot
725                  */
726                 mb();
727
728                 if (msg->header.message_flags.msg_pending) {
729                         /*
730                          * This will cause message queue rescan to
731                          * possibly deliver another msg from the
732                          * hypervisor
733                          */
734                         wrmsrl(HV_X64_MSR_EOM, 0);
735                 }
736         }
737 }
738
739 static void vmbus_isr(void)
740 {
741         int cpu = smp_processor_id();
742         void *page_addr;
743         struct hv_message *msg;
744         union hv_synic_event_flags *event;
745         bool handled = false;
746
747         page_addr = hv_context.synic_event_page[cpu];
748         if (page_addr == NULL)
749                 return;
750
751         event = (union hv_synic_event_flags *)page_addr +
752                                          VMBUS_MESSAGE_SINT;
753         /*
754          * Check for events before checking for messages. This is the order
755          * in which events and messages are checked in Windows guests on
756          * Hyper-V, and the Windows team suggested we do the same.
757          */
758
759         if ((vmbus_proto_version == VERSION_WS2008) ||
760                 (vmbus_proto_version == VERSION_WIN7)) {
761
762                 /* Since we are a child, we only need to check bit 0 */
763                 if (sync_test_and_clear_bit(0,
764                         (unsigned long *) &event->flags32[0])) {
765                         handled = true;
766                 }
767         } else {
768                 /*
769                  * Our host is win8 or above. The signaling mechanism
770                  * has changed and we can directly look at the event page.
771                  * If bit n is set then we have an interrup on the channel
772                  * whose id is n.
773                  */
774                 handled = true;
775         }
776
777         if (handled)
778                 tasklet_schedule(hv_context.event_dpc[cpu]);
779
780
781         page_addr = hv_context.synic_message_page[cpu];
782         msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
783
784         /* Check if there are actual msgs to be processed */
785         if (msg->header.message_type != HVMSG_NONE) {
786                 if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
787                         hv_process_timer_expiration(msg, cpu);
788                 else
789                         tasklet_schedule(&msg_dpc);
790         }
791 }
792
793 #ifdef CONFIG_HOTPLUG_CPU
794 static int hyperv_cpu_disable(void)
795 {
796         return -ENOSYS;
797 }
798
799 static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
800 {
801         static void *previous_cpu_disable;
802
803         /*
804          * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8,
805          * ...) is not supported at this moment as channel interrupts are
806          * distributed across all of them.
807          */
808
809         if ((vmbus_proto_version == VERSION_WS2008) ||
810             (vmbus_proto_version == VERSION_WIN7))
811                 return;
812
813         if (vmbus_loaded) {
814                 previous_cpu_disable = smp_ops.cpu_disable;
815                 smp_ops.cpu_disable = hyperv_cpu_disable;
816                 pr_notice("CPU offlining is not supported by hypervisor\n");
817         } else if (previous_cpu_disable)
818                 smp_ops.cpu_disable = previous_cpu_disable;
819 }
820 #else
821 static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
822 {
823 }
824 #endif
825
826 /*
827  * vmbus_bus_init -Main vmbus driver initialization routine.
828  *
829  * Here, we
830  *      - initialize the vmbus driver context
831  *      - invoke the vmbus hv main init routine
832  *      - get the irq resource
833  *      - retrieve the channel offers
834  */
835 static int vmbus_bus_init(int irq)
836 {
837         int ret;
838
839         /* Hypervisor initialization...setup hypercall page..etc */
840         ret = hv_init();
841         if (ret != 0) {
842                 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
843                 return ret;
844         }
845
846         tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0);
847
848         ret = bus_register(&hv_bus);
849         if (ret)
850                 goto err_cleanup;
851
852         hv_setup_vmbus_irq(vmbus_isr);
853
854         ret = hv_synic_alloc();
855         if (ret)
856                 goto err_alloc;
857         /*
858          * Initialize the per-cpu interrupt state and
859          * connect to the host.
860          */
861         on_each_cpu(hv_synic_init, NULL, 1);
862         ret = vmbus_connect();
863         if (ret)
864                 goto err_alloc;
865
866         hv_cpu_hotplug_quirk(true);
867
868         /*
869          * Only register if the crash MSRs are available
870          */
871         if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
872                 register_die_notifier(&hyperv_die_block);
873                 atomic_notifier_chain_register(&panic_notifier_list,
874                                                &hyperv_panic_block);
875         }
876
877         vmbus_request_offers();
878
879         return 0;
880
881 err_alloc:
882         hv_synic_free();
883         hv_remove_vmbus_irq();
884
885         bus_unregister(&hv_bus);
886
887 err_cleanup:
888         hv_cleanup();
889
890         return ret;
891 }
892
893 /**
894  * __vmbus_child_driver_register - Register a vmbus's driver
895  * @drv: Pointer to driver structure you want to register
896  * @owner: owner module of the drv
897  * @mod_name: module name string
898  *
899  * Registers the given driver with Linux through the 'driver_register()' call
900  * and sets up the hyper-v vmbus handling for this driver.
901  * It will return the state of the 'driver_register()' call.
902  *
903  */
904 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
905 {
906         int ret;
907
908         pr_info("registering driver %s\n", hv_driver->name);
909
910         ret = vmbus_exists();
911         if (ret < 0)
912                 return ret;
913
914         hv_driver->driver.name = hv_driver->name;
915         hv_driver->driver.owner = owner;
916         hv_driver->driver.mod_name = mod_name;
917         hv_driver->driver.bus = &hv_bus;
918
919         ret = driver_register(&hv_driver->driver);
920
921         return ret;
922 }
923 EXPORT_SYMBOL_GPL(__vmbus_driver_register);
924
925 /**
926  * vmbus_driver_unregister() - Unregister a vmbus's driver
927  * @drv: Pointer to driver structure you want to un-register
928  *
929  * Un-register the given driver that was previous registered with a call to
930  * vmbus_driver_register()
931  */
932 void vmbus_driver_unregister(struct hv_driver *hv_driver)
933 {
934         pr_info("unregistering driver %s\n", hv_driver->name);
935
936         if (!vmbus_exists())
937                 driver_unregister(&hv_driver->driver);
938 }
939 EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
940
941 /*
942  * vmbus_device_create - Creates and registers a new child device
943  * on the vmbus.
944  */
945 struct hv_device *vmbus_device_create(const uuid_le *type,
946                                       const uuid_le *instance,
947                                       struct vmbus_channel *channel)
948 {
949         struct hv_device *child_device_obj;
950
951         child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
952         if (!child_device_obj) {
953                 pr_err("Unable to allocate device object for child device\n");
954                 return NULL;
955         }
956
957         child_device_obj->channel = channel;
958         memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
959         memcpy(&child_device_obj->dev_instance, instance,
960                sizeof(uuid_le));
961
962
963         return child_device_obj;
964 }
965
966 /*
967  * vmbus_device_register - Register the child device
968  */
969 int vmbus_device_register(struct hv_device *child_device_obj)
970 {
971         int ret = 0;
972
973         dev_set_name(&child_device_obj->device, "vmbus_%d",
974                      child_device_obj->channel->id);
975
976         child_device_obj->device.bus = &hv_bus;
977         child_device_obj->device.parent = &hv_acpi_dev->dev;
978         child_device_obj->device.release = vmbus_device_release;
979
980         /*
981          * Register with the LDM. This will kick off the driver/device
982          * binding...which will eventually call vmbus_match() and vmbus_probe()
983          */
984         ret = device_register(&child_device_obj->device);
985
986         if (ret)
987                 pr_err("Unable to register child device\n");
988         else
989                 pr_debug("child device %s registered\n",
990                         dev_name(&child_device_obj->device));
991
992         return ret;
993 }
994
995 /*
996  * vmbus_device_unregister - Remove the specified child device
997  * from the vmbus.
998  */
999 void vmbus_device_unregister(struct hv_device *device_obj)
1000 {
1001         pr_debug("child device %s unregistered\n",
1002                 dev_name(&device_obj->device));
1003
1004         /*
1005          * Kick off the process of unregistering the device.
1006          * This will call vmbus_remove() and eventually vmbus_device_release()
1007          */
1008         device_unregister(&device_obj->device);
1009 }
1010
1011
1012 /*
1013  * VMBUS is an acpi enumerated device. Get the information we
1014  * need from DSDT.
1015  */
1016 #define VTPM_BASE_ADDRESS 0xfed40000
1017 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
1018 {
1019         resource_size_t start = 0;
1020         resource_size_t end = 0;
1021         struct resource *new_res;
1022         struct resource **old_res = &hyperv_mmio;
1023         struct resource **prev_res = NULL;
1024
1025         switch (res->type) {
1026         case ACPI_RESOURCE_TYPE_IRQ:
1027                 irq = res->data.irq.interrupts[0];
1028                 return AE_OK;
1029
1030         /*
1031          * "Address" descriptors are for bus windows. Ignore
1032          * "memory" descriptors, which are for registers on
1033          * devices.
1034          */
1035         case ACPI_RESOURCE_TYPE_ADDRESS32:
1036                 start = res->data.address32.address.minimum;
1037                 end = res->data.address32.address.maximum;
1038                 break;
1039
1040         case ACPI_RESOURCE_TYPE_ADDRESS64:
1041                 start = res->data.address64.address.minimum;
1042                 end = res->data.address64.address.maximum;
1043                 break;
1044
1045         default:
1046                 /* Unused resource type */
1047                 return AE_OK;
1048
1049         }
1050         /*
1051          * Ignore ranges that are below 1MB, as they're not
1052          * necessary or useful here.
1053          */
1054         if (end < 0x100000)
1055                 return AE_OK;
1056
1057         new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC);
1058         if (!new_res)
1059                 return AE_NO_MEMORY;
1060
1061         /* If this range overlaps the virtual TPM, truncate it. */
1062         if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
1063                 end = VTPM_BASE_ADDRESS;
1064
1065         new_res->name = "hyperv mmio";
1066         new_res->flags = IORESOURCE_MEM;
1067         new_res->start = start;
1068         new_res->end = end;
1069
1070         do {
1071                 if (!*old_res) {
1072                         *old_res = new_res;
1073                         break;
1074                 }
1075
1076                 if ((*old_res)->end < new_res->start) {
1077                         new_res->sibling = *old_res;
1078                         if (prev_res)
1079                                 (*prev_res)->sibling = new_res;
1080                         *old_res = new_res;
1081                         break;
1082                 }
1083
1084                 prev_res = old_res;
1085                 old_res = &(*old_res)->sibling;
1086
1087         } while (1);
1088
1089         return AE_OK;
1090 }
1091
1092 static int vmbus_acpi_remove(struct acpi_device *device)
1093 {
1094         struct resource *cur_res;
1095         struct resource *next_res;
1096
1097         if (hyperv_mmio) {
1098                 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) {
1099                         next_res = cur_res->sibling;
1100                         kfree(cur_res);
1101                 }
1102         }
1103
1104         return 0;
1105 }
1106
1107 static int vmbus_acpi_add(struct acpi_device *device)
1108 {
1109         acpi_status result;
1110         int ret_val = -ENODEV;
1111         struct acpi_device *ancestor;
1112
1113         hv_acpi_dev = device;
1114
1115         result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
1116                                         vmbus_walk_resources, NULL);
1117
1118         if (ACPI_FAILURE(result))
1119                 goto acpi_walk_err;
1120         /*
1121          * Some ancestor of the vmbus acpi device (Gen1 or Gen2
1122          * firmware) is the VMOD that has the mmio ranges. Get that.
1123          */
1124         for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) {
1125                 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
1126                                              vmbus_walk_resources, NULL);
1127
1128                 if (ACPI_FAILURE(result))
1129                         continue;
1130                 if (hyperv_mmio)
1131                         break;
1132         }
1133         ret_val = 0;
1134
1135 acpi_walk_err:
1136         complete(&probe_event);
1137         if (ret_val)
1138                 vmbus_acpi_remove(device);
1139         return ret_val;
1140 }
1141
1142 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
1143         {"VMBUS", 0},
1144         {"VMBus", 0},
1145         {"", 0},
1146 };
1147 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
1148
1149 static struct acpi_driver vmbus_acpi_driver = {
1150         .name = "vmbus",
1151         .ids = vmbus_acpi_device_ids,
1152         .ops = {
1153                 .add = vmbus_acpi_add,
1154                 .remove = vmbus_acpi_remove,
1155         },
1156 };
1157
1158 static void hv_kexec_handler(void)
1159 {
1160         int cpu;
1161
1162         hv_synic_clockevents_cleanup();
1163         vmbus_initiate_unload();
1164         for_each_online_cpu(cpu)
1165                 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
1166         hv_cleanup();
1167 };
1168
1169 static void hv_crash_handler(struct pt_regs *regs)
1170 {
1171         vmbus_initiate_unload();
1172         /*
1173          * In crash handler we can't schedule synic cleanup for all CPUs,
1174          * doing the cleanup for current CPU only. This should be sufficient
1175          * for kdump.
1176          */
1177         hv_synic_cleanup(NULL);
1178         hv_cleanup();
1179 };
1180
1181 static int __init hv_acpi_init(void)
1182 {
1183         int ret, t;
1184
1185         if (x86_hyper != &x86_hyper_ms_hyperv)
1186                 return -ENODEV;
1187
1188         init_completion(&probe_event);
1189
1190         /*
1191          * Get irq resources first.
1192          */
1193         ret = acpi_bus_register_driver(&vmbus_acpi_driver);
1194
1195         if (ret)
1196                 return ret;
1197
1198         t = wait_for_completion_timeout(&probe_event, 5*HZ);
1199         if (t == 0) {
1200                 ret = -ETIMEDOUT;
1201                 goto cleanup;
1202         }
1203
1204         if (irq <= 0) {
1205                 ret = -ENODEV;
1206                 goto cleanup;
1207         }
1208
1209         ret = vmbus_bus_init(irq);
1210         if (ret)
1211                 goto cleanup;
1212
1213         hv_setup_kexec_handler(hv_kexec_handler);
1214         hv_setup_crash_handler(hv_crash_handler);
1215
1216         return 0;
1217
1218 cleanup:
1219         acpi_bus_unregister_driver(&vmbus_acpi_driver);
1220         hv_acpi_dev = NULL;
1221         return ret;
1222 }
1223
1224 static void __exit vmbus_exit(void)
1225 {
1226         int cpu;
1227
1228         hv_remove_kexec_handler();
1229         hv_remove_crash_handler();
1230         vmbus_connection.conn_state = DISCONNECTED;
1231         hv_synic_clockevents_cleanup();
1232         vmbus_disconnect();
1233         hv_remove_vmbus_irq();
1234         tasklet_kill(&msg_dpc);
1235         vmbus_free_channels();
1236         if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
1237                 unregister_die_notifier(&hyperv_die_block);
1238                 atomic_notifier_chain_unregister(&panic_notifier_list,
1239                                                  &hyperv_panic_block);
1240         }
1241         bus_unregister(&hv_bus);
1242         hv_cleanup();
1243         for_each_online_cpu(cpu) {
1244                 tasklet_kill(hv_context.event_dpc[cpu]);
1245                 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
1246         }
1247         hv_synic_free();
1248         acpi_bus_unregister_driver(&vmbus_acpi_driver);
1249         hv_cpu_hotplug_quirk(false);
1250 }
1251
1252
1253 MODULE_LICENSE("GPL");
1254
1255 subsys_initcall(hv_acpi_init);
1256 module_exit(vmbus_exit);