2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/pgtable.h>
33 #include <asm/switch_to.h>
39 #define KMSG_COMPONENT "kvm-s390"
41 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43 #define CREATE_TRACE_POINTS
45 #include "trace-s390.h"
47 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
50 (KVM_MAX_VCPUS + LOCAL_IRQS))
52 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54 struct kvm_stats_debugfs_item debugfs_entries[] = {
55 { "userspace_handled", VCPU_STAT(exit_userspace) },
56 { "exit_null", VCPU_STAT(exit_null) },
57 { "exit_validity", VCPU_STAT(exit_validity) },
58 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
59 { "exit_external_request", VCPU_STAT(exit_external_request) },
60 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
61 { "exit_instruction", VCPU_STAT(exit_instruction) },
62 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
63 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
64 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
65 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
66 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
67 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
68 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
69 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
70 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
71 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
72 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
73 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
74 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
75 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
76 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
77 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
78 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
79 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
80 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
81 { "instruction_spx", VCPU_STAT(instruction_spx) },
82 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
83 { "instruction_stap", VCPU_STAT(instruction_stap) },
84 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
85 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
86 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
87 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
88 { "instruction_essa", VCPU_STAT(instruction_essa) },
89 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
90 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
91 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
92 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
93 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
94 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
95 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
96 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
97 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
98 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
99 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
100 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
101 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
102 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
103 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
104 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
105 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
106 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
107 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
108 { "diagnose_10", VCPU_STAT(diagnose_10) },
109 { "diagnose_44", VCPU_STAT(diagnose_44) },
110 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
111 { "diagnose_258", VCPU_STAT(diagnose_258) },
112 { "diagnose_308", VCPU_STAT(diagnose_308) },
113 { "diagnose_500", VCPU_STAT(diagnose_500) },
117 /* upper facilities limit for kvm */
118 unsigned long kvm_s390_fac_list_mask[] = {
119 0xffe6fffbfcfdfc40UL,
120 0x005e800000000000UL,
123 unsigned long kvm_s390_fac_list_mask_size(void)
125 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
126 return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 static struct gmap_notifier gmap_notifier;
131 /* Section: not file related */
132 int kvm_arch_hardware_enable(void)
134 /* every s390 is virtualization enabled ;-) */
138 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
140 int kvm_arch_hardware_setup(void)
142 gmap_notifier.notifier_call = kvm_gmap_notifier;
143 gmap_register_ipte_notifier(&gmap_notifier);
147 void kvm_arch_hardware_unsetup(void)
149 gmap_unregister_ipte_notifier(&gmap_notifier);
152 int kvm_arch_init(void *opaque)
154 /* Register floating interrupt controller interface. */
155 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
158 /* Section: device related */
159 long kvm_arch_dev_ioctl(struct file *filp,
160 unsigned int ioctl, unsigned long arg)
162 if (ioctl == KVM_S390_ENABLE_SIE)
163 return s390_enable_sie();
167 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
172 case KVM_CAP_S390_PSW:
173 case KVM_CAP_S390_GMAP:
174 case KVM_CAP_SYNC_MMU:
175 #ifdef CONFIG_KVM_S390_UCONTROL
176 case KVM_CAP_S390_UCONTROL:
178 case KVM_CAP_ASYNC_PF:
179 case KVM_CAP_SYNC_REGS:
180 case KVM_CAP_ONE_REG:
181 case KVM_CAP_ENABLE_CAP:
182 case KVM_CAP_S390_CSS_SUPPORT:
183 case KVM_CAP_IOEVENTFD:
184 case KVM_CAP_DEVICE_CTRL:
185 case KVM_CAP_ENABLE_CAP_VM:
186 case KVM_CAP_S390_IRQCHIP:
187 case KVM_CAP_VM_ATTRIBUTES:
188 case KVM_CAP_MP_STATE:
189 case KVM_CAP_S390_INJECT_IRQ:
190 case KVM_CAP_S390_USER_SIGP:
191 case KVM_CAP_S390_USER_STSI:
192 case KVM_CAP_S390_SKEYS:
193 case KVM_CAP_S390_IRQ_STATE:
196 case KVM_CAP_S390_MEM_OP:
199 case KVM_CAP_NR_VCPUS:
200 case KVM_CAP_MAX_VCPUS:
203 case KVM_CAP_NR_MEMSLOTS:
204 r = KVM_USER_MEM_SLOTS;
206 case KVM_CAP_S390_COW:
207 r = MACHINE_HAS_ESOP;
209 case KVM_CAP_S390_VECTOR_REGISTERS:
218 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
219 struct kvm_memory_slot *memslot)
221 gfn_t cur_gfn, last_gfn;
222 unsigned long address;
223 struct gmap *gmap = kvm->arch.gmap;
225 down_read(&gmap->mm->mmap_sem);
226 /* Loop over all guest pages */
227 last_gfn = memslot->base_gfn + memslot->npages;
228 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
229 address = gfn_to_hva_memslot(memslot, cur_gfn);
231 if (gmap_test_and_clear_dirty(address, gmap))
232 mark_page_dirty(kvm, cur_gfn);
234 up_read(&gmap->mm->mmap_sem);
237 /* Section: vm related */
239 * Get (and clear) the dirty memory log for a memory slot.
241 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
242 struct kvm_dirty_log *log)
246 struct kvm_memslots *slots;
247 struct kvm_memory_slot *memslot;
250 mutex_lock(&kvm->slots_lock);
253 if (log->slot >= KVM_USER_MEM_SLOTS)
256 slots = kvm_memslots(kvm);
257 memslot = id_to_memslot(slots, log->slot);
259 if (!memslot->dirty_bitmap)
262 kvm_s390_sync_dirty_log(kvm, memslot);
263 r = kvm_get_dirty_log(kvm, log, &is_dirty);
267 /* Clear the dirty log */
269 n = kvm_dirty_bitmap_bytes(memslot);
270 memset(memslot->dirty_bitmap, 0, n);
274 mutex_unlock(&kvm->slots_lock);
278 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
286 case KVM_CAP_S390_IRQCHIP:
287 kvm->arch.use_irqchip = 1;
290 case KVM_CAP_S390_USER_SIGP:
291 kvm->arch.user_sigp = 1;
294 case KVM_CAP_S390_VECTOR_REGISTERS:
295 if (MACHINE_HAS_VX) {
296 set_kvm_facility(kvm->arch.model.fac->mask, 129);
297 set_kvm_facility(kvm->arch.model.fac->list, 129);
302 case KVM_CAP_S390_USER_STSI:
303 kvm->arch.user_stsi = 1;
313 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
317 switch (attr->attr) {
318 case KVM_S390_VM_MEM_LIMIT_SIZE:
320 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
330 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
334 switch (attr->attr) {
335 case KVM_S390_VM_MEM_ENABLE_CMMA:
336 /* enable CMMA only for z10 and later (EDAT_1) */
338 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
342 mutex_lock(&kvm->lock);
343 if (atomic_read(&kvm->online_vcpus) == 0) {
344 kvm->arch.use_cmma = 1;
347 mutex_unlock(&kvm->lock);
349 case KVM_S390_VM_MEM_CLR_CMMA:
351 if (!kvm->arch.use_cmma)
354 mutex_lock(&kvm->lock);
355 idx = srcu_read_lock(&kvm->srcu);
356 s390_reset_cmma(kvm->arch.gmap->mm);
357 srcu_read_unlock(&kvm->srcu, idx);
358 mutex_unlock(&kvm->lock);
361 case KVM_S390_VM_MEM_LIMIT_SIZE: {
362 unsigned long new_limit;
364 if (kvm_is_ucontrol(kvm))
367 if (get_user(new_limit, (u64 __user *)attr->addr))
370 if (new_limit > kvm->arch.gmap->asce_end)
374 mutex_lock(&kvm->lock);
375 if (atomic_read(&kvm->online_vcpus) == 0) {
376 /* gmap_alloc will round the limit up */
377 struct gmap *new = gmap_alloc(current->mm, new_limit);
382 gmap_free(kvm->arch.gmap);
384 kvm->arch.gmap = new;
388 mutex_unlock(&kvm->lock);
398 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
400 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
402 struct kvm_vcpu *vcpu;
405 if (!test_kvm_facility(kvm, 76))
408 mutex_lock(&kvm->lock);
409 switch (attr->attr) {
410 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
412 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
413 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
414 kvm->arch.crypto.aes_kw = 1;
416 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
418 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
419 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
420 kvm->arch.crypto.dea_kw = 1;
422 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
423 kvm->arch.crypto.aes_kw = 0;
424 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
425 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
427 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
428 kvm->arch.crypto.dea_kw = 0;
429 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
430 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
433 mutex_unlock(&kvm->lock);
437 kvm_for_each_vcpu(i, vcpu, kvm) {
438 kvm_s390_vcpu_crypto_setup(vcpu);
441 mutex_unlock(&kvm->lock);
445 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
449 if (copy_from_user(>od_high, (void __user *)attr->addr,
459 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
461 struct kvm_vcpu *cur_vcpu;
462 unsigned int vcpu_idx;
466 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
469 r = store_tod_clock(&host_tod);
473 mutex_lock(&kvm->lock);
474 kvm->arch.epoch = gtod - host_tod;
475 kvm_s390_vcpu_block_all(kvm);
476 kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
477 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
478 kvm_s390_vcpu_unblock_all(kvm);
479 mutex_unlock(&kvm->lock);
483 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
490 switch (attr->attr) {
491 case KVM_S390_VM_TOD_HIGH:
492 ret = kvm_s390_set_tod_high(kvm, attr);
494 case KVM_S390_VM_TOD_LOW:
495 ret = kvm_s390_set_tod_low(kvm, attr);
504 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
508 if (copy_to_user((void __user *)attr->addr, >od_high,
515 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
520 r = store_tod_clock(&host_tod);
524 gtod = host_tod + kvm->arch.epoch;
525 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
531 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
538 switch (attr->attr) {
539 case KVM_S390_VM_TOD_HIGH:
540 ret = kvm_s390_get_tod_high(kvm, attr);
542 case KVM_S390_VM_TOD_LOW:
543 ret = kvm_s390_get_tod_low(kvm, attr);
552 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
554 struct kvm_s390_vm_cpu_processor *proc;
557 mutex_lock(&kvm->lock);
558 if (atomic_read(&kvm->online_vcpus)) {
562 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
567 if (!copy_from_user(proc, (void __user *)attr->addr,
569 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
570 sizeof(struct cpuid));
571 kvm->arch.model.ibc = proc->ibc;
572 memcpy(kvm->arch.model.fac->list, proc->fac_list,
573 S390_ARCH_FAC_LIST_SIZE_BYTE);
578 mutex_unlock(&kvm->lock);
582 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
586 switch (attr->attr) {
587 case KVM_S390_VM_CPU_PROCESSOR:
588 ret = kvm_s390_set_processor(kvm, attr);
594 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
596 struct kvm_s390_vm_cpu_processor *proc;
599 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
604 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
605 proc->ibc = kvm->arch.model.ibc;
606 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
607 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
614 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
616 struct kvm_s390_vm_cpu_machine *mach;
619 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
624 get_cpu_id((struct cpuid *) &mach->cpuid);
625 mach->ibc = sclp.ibc;
626 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
627 S390_ARCH_FAC_LIST_SIZE_BYTE);
628 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
629 S390_ARCH_FAC_LIST_SIZE_BYTE);
630 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
637 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
641 switch (attr->attr) {
642 case KVM_S390_VM_CPU_PROCESSOR:
643 ret = kvm_s390_get_processor(kvm, attr);
645 case KVM_S390_VM_CPU_MACHINE:
646 ret = kvm_s390_get_machine(kvm, attr);
652 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
656 switch (attr->group) {
657 case KVM_S390_VM_MEM_CTRL:
658 ret = kvm_s390_set_mem_control(kvm, attr);
660 case KVM_S390_VM_TOD:
661 ret = kvm_s390_set_tod(kvm, attr);
663 case KVM_S390_VM_CPU_MODEL:
664 ret = kvm_s390_set_cpu_model(kvm, attr);
666 case KVM_S390_VM_CRYPTO:
667 ret = kvm_s390_vm_set_crypto(kvm, attr);
677 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
681 switch (attr->group) {
682 case KVM_S390_VM_MEM_CTRL:
683 ret = kvm_s390_get_mem_control(kvm, attr);
685 case KVM_S390_VM_TOD:
686 ret = kvm_s390_get_tod(kvm, attr);
688 case KVM_S390_VM_CPU_MODEL:
689 ret = kvm_s390_get_cpu_model(kvm, attr);
699 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
703 switch (attr->group) {
704 case KVM_S390_VM_MEM_CTRL:
705 switch (attr->attr) {
706 case KVM_S390_VM_MEM_ENABLE_CMMA:
707 case KVM_S390_VM_MEM_CLR_CMMA:
708 case KVM_S390_VM_MEM_LIMIT_SIZE:
716 case KVM_S390_VM_TOD:
717 switch (attr->attr) {
718 case KVM_S390_VM_TOD_LOW:
719 case KVM_S390_VM_TOD_HIGH:
727 case KVM_S390_VM_CPU_MODEL:
728 switch (attr->attr) {
729 case KVM_S390_VM_CPU_PROCESSOR:
730 case KVM_S390_VM_CPU_MACHINE:
738 case KVM_S390_VM_CRYPTO:
739 switch (attr->attr) {
740 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
741 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
742 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
743 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
759 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
763 unsigned long curkey;
766 if (args->flags != 0)
769 /* Is this guest using storage keys? */
770 if (!mm_use_skey(current->mm))
771 return KVM_S390_GET_SKEYS_NONE;
773 /* Enforce sane limit on memory allocation */
774 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
777 keys = kmalloc_array(args->count, sizeof(uint8_t),
778 GFP_KERNEL | __GFP_NOWARN);
780 keys = vmalloc(sizeof(uint8_t) * args->count);
784 for (i = 0; i < args->count; i++) {
785 hva = gfn_to_hva(kvm, args->start_gfn + i);
786 if (kvm_is_error_hva(hva)) {
791 curkey = get_guest_storage_key(current->mm, hva);
792 if (IS_ERR_VALUE(curkey)) {
799 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
800 sizeof(uint8_t) * args->count);
808 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
814 if (args->flags != 0)
817 /* Enforce sane limit on memory allocation */
818 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
821 keys = kmalloc_array(args->count, sizeof(uint8_t),
822 GFP_KERNEL | __GFP_NOWARN);
824 keys = vmalloc(sizeof(uint8_t) * args->count);
828 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
829 sizeof(uint8_t) * args->count);
835 /* Enable storage key handling for the guest */
836 r = s390_enable_skey();
840 for (i = 0; i < args->count; i++) {
841 hva = gfn_to_hva(kvm, args->start_gfn + i);
842 if (kvm_is_error_hva(hva)) {
847 /* Lowest order bit is reserved */
848 if (keys[i] & 0x01) {
853 r = set_guest_storage_key(current->mm, hva,
854 (unsigned long)keys[i], 0);
863 long kvm_arch_vm_ioctl(struct file *filp,
864 unsigned int ioctl, unsigned long arg)
866 struct kvm *kvm = filp->private_data;
867 void __user *argp = (void __user *)arg;
868 struct kvm_device_attr attr;
872 case KVM_S390_INTERRUPT: {
873 struct kvm_s390_interrupt s390int;
876 if (copy_from_user(&s390int, argp, sizeof(s390int)))
878 r = kvm_s390_inject_vm(kvm, &s390int);
881 case KVM_ENABLE_CAP: {
882 struct kvm_enable_cap cap;
884 if (copy_from_user(&cap, argp, sizeof(cap)))
886 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
889 case KVM_CREATE_IRQCHIP: {
890 struct kvm_irq_routing_entry routing;
893 if (kvm->arch.use_irqchip) {
894 /* Set up dummy routing. */
895 memset(&routing, 0, sizeof(routing));
896 kvm_set_irq_routing(kvm, &routing, 0, 0);
901 case KVM_SET_DEVICE_ATTR: {
903 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
905 r = kvm_s390_vm_set_attr(kvm, &attr);
908 case KVM_GET_DEVICE_ATTR: {
910 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
912 r = kvm_s390_vm_get_attr(kvm, &attr);
915 case KVM_HAS_DEVICE_ATTR: {
917 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
919 r = kvm_s390_vm_has_attr(kvm, &attr);
922 case KVM_S390_GET_SKEYS: {
923 struct kvm_s390_skeys args;
926 if (copy_from_user(&args, argp,
927 sizeof(struct kvm_s390_skeys)))
929 r = kvm_s390_get_skeys(kvm, &args);
932 case KVM_S390_SET_SKEYS: {
933 struct kvm_s390_skeys args;
936 if (copy_from_user(&args, argp,
937 sizeof(struct kvm_s390_skeys)))
939 r = kvm_s390_set_skeys(kvm, &args);
949 static int kvm_s390_query_ap_config(u8 *config)
951 u32 fcn_code = 0x04000000UL;
954 memset(config, 0, 128);
958 ".long 0xb2af0000\n" /* PQAP(QCI) */
964 : "r" (fcn_code), "r" (config)
965 : "cc", "0", "2", "memory"
971 static int kvm_s390_apxa_installed(void)
976 if (test_facility(2) && test_facility(12)) {
977 cc = kvm_s390_query_ap_config(config);
980 pr_err("PQAP(QCI) failed with cc=%d", cc);
982 return config[0] & 0x40;
988 static void kvm_s390_set_crycb_format(struct kvm *kvm)
990 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
992 if (kvm_s390_apxa_installed())
993 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
995 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
998 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1001 cpu_id->version = 0xff;
1004 static int kvm_s390_crypto_init(struct kvm *kvm)
1006 if (!test_kvm_facility(kvm, 76))
1009 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1010 GFP_KERNEL | GFP_DMA);
1011 if (!kvm->arch.crypto.crycb)
1014 kvm_s390_set_crycb_format(kvm);
1016 /* Enable AES/DEA protected key functions by default */
1017 kvm->arch.crypto.aes_kw = 1;
1018 kvm->arch.crypto.dea_kw = 1;
1019 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1020 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1021 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1022 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1027 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1030 char debug_name[16];
1031 static unsigned long sca_offset;
1034 #ifdef CONFIG_KVM_S390_UCONTROL
1035 if (type & ~KVM_VM_S390_UCONTROL)
1037 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1044 rc = s390_enable_sie();
1050 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1053 spin_lock(&kvm_lock);
1054 sca_offset = (sca_offset + 16) & 0x7f0;
1055 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1056 spin_unlock(&kvm_lock);
1058 sprintf(debug_name, "kvm-%u", current->pid);
1060 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
1065 * The architectural maximum amount of facilities is 16 kbit. To store
1066 * this amount, 2 kbyte of memory is required. Thus we need a full
1067 * page to hold the guest facility list (arch.model.fac->list) and the
1068 * facility mask (arch.model.fac->mask). Its address size has to be
1069 * 31 bits and word aligned.
1071 kvm->arch.model.fac =
1072 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1073 if (!kvm->arch.model.fac)
1076 /* Populate the facility mask initially. */
1077 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1078 S390_ARCH_FAC_LIST_SIZE_BYTE);
1079 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1080 if (i < kvm_s390_fac_list_mask_size())
1081 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1083 kvm->arch.model.fac->mask[i] = 0UL;
1086 /* Populate the facility list initially. */
1087 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1088 S390_ARCH_FAC_LIST_SIZE_BYTE);
1090 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1091 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1093 if (kvm_s390_crypto_init(kvm) < 0)
1096 spin_lock_init(&kvm->arch.float_int.lock);
1097 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1098 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1099 init_waitqueue_head(&kvm->arch.ipte_wq);
1100 mutex_init(&kvm->arch.ipte_mutex);
1102 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1103 VM_EVENT(kvm, 3, "%s", "vm created");
1105 if (type & KVM_VM_S390_UCONTROL) {
1106 kvm->arch.gmap = NULL;
1108 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1109 if (!kvm->arch.gmap)
1111 kvm->arch.gmap->private = kvm;
1112 kvm->arch.gmap->pfault_enabled = 0;
1115 kvm->arch.css_support = 0;
1116 kvm->arch.use_irqchip = 0;
1117 kvm->arch.epoch = 0;
1119 spin_lock_init(&kvm->arch.start_stop_lock);
1123 kfree(kvm->arch.crypto.crycb);
1124 free_page((unsigned long)kvm->arch.model.fac);
1125 debug_unregister(kvm->arch.dbf);
1126 free_page((unsigned long)(kvm->arch.sca));
1130 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1132 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1133 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1134 kvm_s390_clear_local_irqs(vcpu);
1135 kvm_clear_async_pf_completion_queue(vcpu);
1136 if (!kvm_is_ucontrol(vcpu->kvm)) {
1137 clear_bit(63 - vcpu->vcpu_id,
1138 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1139 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1140 (__u64) vcpu->arch.sie_block)
1141 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1145 if (kvm_is_ucontrol(vcpu->kvm))
1146 gmap_free(vcpu->arch.gmap);
1148 if (vcpu->kvm->arch.use_cmma)
1149 kvm_s390_vcpu_unsetup_cmma(vcpu);
1150 free_page((unsigned long)(vcpu->arch.sie_block));
1152 kvm_vcpu_uninit(vcpu);
1153 kmem_cache_free(kvm_vcpu_cache, vcpu);
1156 static void kvm_free_vcpus(struct kvm *kvm)
1159 struct kvm_vcpu *vcpu;
1161 kvm_for_each_vcpu(i, vcpu, kvm)
1162 kvm_arch_vcpu_destroy(vcpu);
1164 mutex_lock(&kvm->lock);
1165 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1166 kvm->vcpus[i] = NULL;
1168 atomic_set(&kvm->online_vcpus, 0);
1169 mutex_unlock(&kvm->lock);
1172 void kvm_arch_destroy_vm(struct kvm *kvm)
1174 kvm_free_vcpus(kvm);
1175 free_page((unsigned long)kvm->arch.model.fac);
1176 free_page((unsigned long)(kvm->arch.sca));
1177 debug_unregister(kvm->arch.dbf);
1178 kfree(kvm->arch.crypto.crycb);
1179 if (!kvm_is_ucontrol(kvm))
1180 gmap_free(kvm->arch.gmap);
1181 kvm_s390_destroy_adapters(kvm);
1182 kvm_s390_clear_float_irqs(kvm);
1185 /* Section: vcpu related */
1186 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1188 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1189 if (!vcpu->arch.gmap)
1191 vcpu->arch.gmap->private = vcpu->kvm;
1196 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1198 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1199 kvm_clear_async_pf_completion_queue(vcpu);
1200 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1206 if (test_kvm_facility(vcpu->kvm, 129))
1207 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1209 if (kvm_is_ucontrol(vcpu->kvm))
1210 return __kvm_ucontrol_vcpu_init(vcpu);
1215 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1217 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1218 if (test_kvm_facility(vcpu->kvm, 129))
1219 save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1221 save_fp_regs(vcpu->arch.host_fpregs.fprs);
1222 save_access_regs(vcpu->arch.host_acrs);
1223 if (test_kvm_facility(vcpu->kvm, 129)) {
1224 restore_fp_ctl(&vcpu->run->s.regs.fpc);
1225 restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1227 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1228 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1230 restore_access_regs(vcpu->run->s.regs.acrs);
1231 gmap_enable(vcpu->arch.gmap);
1232 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1235 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1237 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1238 gmap_disable(vcpu->arch.gmap);
1239 if (test_kvm_facility(vcpu->kvm, 129)) {
1240 save_fp_ctl(&vcpu->run->s.regs.fpc);
1241 save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1243 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1244 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1246 save_access_regs(vcpu->run->s.regs.acrs);
1247 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1248 if (test_kvm_facility(vcpu->kvm, 129))
1249 restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1251 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
1252 restore_access_regs(vcpu->arch.host_acrs);
1255 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1257 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1258 vcpu->arch.sie_block->gpsw.mask = 0UL;
1259 vcpu->arch.sie_block->gpsw.addr = 0UL;
1260 kvm_s390_set_prefix(vcpu, 0);
1261 vcpu->arch.sie_block->cputm = 0UL;
1262 vcpu->arch.sie_block->ckc = 0UL;
1263 vcpu->arch.sie_block->todpr = 0;
1264 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1265 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1266 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1267 vcpu->arch.guest_fpregs.fpc = 0;
1268 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1269 vcpu->arch.sie_block->gbea = 1;
1270 vcpu->arch.sie_block->pp = 0;
1271 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1272 kvm_clear_async_pf_completion_queue(vcpu);
1273 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1274 kvm_s390_vcpu_stop(vcpu);
1275 kvm_s390_clear_local_irqs(vcpu);
1278 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1280 mutex_lock(&vcpu->kvm->lock);
1281 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1282 mutex_unlock(&vcpu->kvm->lock);
1283 if (!kvm_is_ucontrol(vcpu->kvm))
1284 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1287 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1289 if (!test_kvm_facility(vcpu->kvm, 76))
1292 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1294 if (vcpu->kvm->arch.crypto.aes_kw)
1295 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1296 if (vcpu->kvm->arch.crypto.dea_kw)
1297 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1299 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1302 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1304 free_page(vcpu->arch.sie_block->cbrlo);
1305 vcpu->arch.sie_block->cbrlo = 0;
1308 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1310 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1311 if (!vcpu->arch.sie_block->cbrlo)
1314 vcpu->arch.sie_block->ecb2 |= 0x80;
1315 vcpu->arch.sie_block->ecb2 &= ~0x08;
1319 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1321 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1323 vcpu->arch.cpu_id = model->cpu_id;
1324 vcpu->arch.sie_block->ibc = model->ibc;
1325 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1328 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1332 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1336 if (test_kvm_facility(vcpu->kvm, 78))
1337 atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1338 else if (test_kvm_facility(vcpu->kvm, 8))
1339 atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1341 kvm_s390_vcpu_setup_model(vcpu);
1343 vcpu->arch.sie_block->ecb = 6;
1344 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1345 vcpu->arch.sie_block->ecb |= 0x10;
1347 vcpu->arch.sie_block->ecb2 = 8;
1348 vcpu->arch.sie_block->eca = 0xC1002000U;
1350 vcpu->arch.sie_block->eca |= 1;
1351 if (sclp.has_sigpif)
1352 vcpu->arch.sie_block->eca |= 0x10000000U;
1353 if (test_kvm_facility(vcpu->kvm, 129)) {
1354 vcpu->arch.sie_block->eca |= 0x00020000;
1355 vcpu->arch.sie_block->ecd |= 0x20000000;
1357 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1359 if (vcpu->kvm->arch.use_cmma) {
1360 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1364 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1365 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1367 kvm_s390_vcpu_crypto_setup(vcpu);
1372 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1375 struct kvm_vcpu *vcpu;
1376 struct sie_page *sie_page;
1379 if (id >= KVM_MAX_VCPUS)
1384 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1388 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1392 vcpu->arch.sie_block = &sie_page->sie_block;
1393 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1394 vcpu->arch.host_vregs = &sie_page->vregs;
1396 vcpu->arch.sie_block->icpua = id;
1397 if (!kvm_is_ucontrol(kvm)) {
1398 if (!kvm->arch.sca) {
1402 if (!kvm->arch.sca->cpu[id].sda)
1403 kvm->arch.sca->cpu[id].sda =
1404 (__u64) vcpu->arch.sie_block;
1405 vcpu->arch.sie_block->scaoh =
1406 (__u32)(((__u64)kvm->arch.sca) >> 32);
1407 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1408 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1411 spin_lock_init(&vcpu->arch.local_int.lock);
1412 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1413 vcpu->arch.local_int.wq = &vcpu->wq;
1414 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1416 rc = kvm_vcpu_init(vcpu, kvm, id);
1418 goto out_free_sie_block;
1419 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1420 vcpu->arch.sie_block);
1421 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1425 free_page((unsigned long)(vcpu->arch.sie_block));
1427 kmem_cache_free(kvm_vcpu_cache, vcpu);
1432 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1434 return kvm_s390_vcpu_has_irq(vcpu, 0);
1437 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1439 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1443 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1445 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1448 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1450 atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1454 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1456 atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1460 * Kick a guest cpu out of SIE and wait until SIE is not running.
1461 * If the CPU is not running (e.g. waiting as idle) the function will
1462 * return immediately. */
1463 void exit_sie(struct kvm_vcpu *vcpu)
1465 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1466 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1470 /* Kick a guest cpu out of SIE to process a request synchronously */
1471 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1473 kvm_make_request(req, vcpu);
1474 kvm_s390_vcpu_request(vcpu);
1477 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1480 struct kvm *kvm = gmap->private;
1481 struct kvm_vcpu *vcpu;
1483 kvm_for_each_vcpu(i, vcpu, kvm) {
1484 /* match against both prefix pages */
1485 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1486 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1487 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1492 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1494 /* kvm common code refers to this, but never calls it */
1499 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1500 struct kvm_one_reg *reg)
1505 case KVM_REG_S390_TODPR:
1506 r = put_user(vcpu->arch.sie_block->todpr,
1507 (u32 __user *)reg->addr);
1509 case KVM_REG_S390_EPOCHDIFF:
1510 r = put_user(vcpu->arch.sie_block->epoch,
1511 (u64 __user *)reg->addr);
1513 case KVM_REG_S390_CPU_TIMER:
1514 r = put_user(vcpu->arch.sie_block->cputm,
1515 (u64 __user *)reg->addr);
1517 case KVM_REG_S390_CLOCK_COMP:
1518 r = put_user(vcpu->arch.sie_block->ckc,
1519 (u64 __user *)reg->addr);
1521 case KVM_REG_S390_PFTOKEN:
1522 r = put_user(vcpu->arch.pfault_token,
1523 (u64 __user *)reg->addr);
1525 case KVM_REG_S390_PFCOMPARE:
1526 r = put_user(vcpu->arch.pfault_compare,
1527 (u64 __user *)reg->addr);
1529 case KVM_REG_S390_PFSELECT:
1530 r = put_user(vcpu->arch.pfault_select,
1531 (u64 __user *)reg->addr);
1533 case KVM_REG_S390_PP:
1534 r = put_user(vcpu->arch.sie_block->pp,
1535 (u64 __user *)reg->addr);
1537 case KVM_REG_S390_GBEA:
1538 r = put_user(vcpu->arch.sie_block->gbea,
1539 (u64 __user *)reg->addr);
1548 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1549 struct kvm_one_reg *reg)
1554 case KVM_REG_S390_TODPR:
1555 r = get_user(vcpu->arch.sie_block->todpr,
1556 (u32 __user *)reg->addr);
1558 case KVM_REG_S390_EPOCHDIFF:
1559 r = get_user(vcpu->arch.sie_block->epoch,
1560 (u64 __user *)reg->addr);
1562 case KVM_REG_S390_CPU_TIMER:
1563 r = get_user(vcpu->arch.sie_block->cputm,
1564 (u64 __user *)reg->addr);
1566 case KVM_REG_S390_CLOCK_COMP:
1567 r = get_user(vcpu->arch.sie_block->ckc,
1568 (u64 __user *)reg->addr);
1570 case KVM_REG_S390_PFTOKEN:
1571 r = get_user(vcpu->arch.pfault_token,
1572 (u64 __user *)reg->addr);
1573 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1574 kvm_clear_async_pf_completion_queue(vcpu);
1576 case KVM_REG_S390_PFCOMPARE:
1577 r = get_user(vcpu->arch.pfault_compare,
1578 (u64 __user *)reg->addr);
1580 case KVM_REG_S390_PFSELECT:
1581 r = get_user(vcpu->arch.pfault_select,
1582 (u64 __user *)reg->addr);
1584 case KVM_REG_S390_PP:
1585 r = get_user(vcpu->arch.sie_block->pp,
1586 (u64 __user *)reg->addr);
1588 case KVM_REG_S390_GBEA:
1589 r = get_user(vcpu->arch.sie_block->gbea,
1590 (u64 __user *)reg->addr);
1599 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1601 kvm_s390_vcpu_initial_reset(vcpu);
1605 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1607 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1611 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1613 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1617 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1618 struct kvm_sregs *sregs)
1620 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1621 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1622 restore_access_regs(vcpu->run->s.regs.acrs);
1626 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1627 struct kvm_sregs *sregs)
1629 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1630 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1634 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1636 if (test_fp_ctl(fpu->fpc))
1638 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1639 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1640 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1641 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1645 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1647 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1648 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1652 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1656 if (!is_vcpu_stopped(vcpu))
1659 vcpu->run->psw_mask = psw.mask;
1660 vcpu->run->psw_addr = psw.addr;
1665 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1666 struct kvm_translation *tr)
1668 return -EINVAL; /* not implemented yet */
1671 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1672 KVM_GUESTDBG_USE_HW_BP | \
1673 KVM_GUESTDBG_ENABLE)
1675 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1676 struct kvm_guest_debug *dbg)
1680 vcpu->guest_debug = 0;
1681 kvm_s390_clear_bp_data(vcpu);
1683 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1686 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1687 vcpu->guest_debug = dbg->control;
1688 /* enforce guest PER */
1689 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1691 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1692 rc = kvm_s390_import_bp_data(vcpu, dbg);
1694 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1695 vcpu->arch.guestdbg.last_bp = 0;
1699 vcpu->guest_debug = 0;
1700 kvm_s390_clear_bp_data(vcpu);
1701 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1707 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1708 struct kvm_mp_state *mp_state)
1710 /* CHECK_STOP and LOAD are not supported yet */
1711 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1712 KVM_MP_STATE_OPERATING;
1715 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1716 struct kvm_mp_state *mp_state)
1720 /* user space knows about this interface - let it control the state */
1721 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1723 switch (mp_state->mp_state) {
1724 case KVM_MP_STATE_STOPPED:
1725 kvm_s390_vcpu_stop(vcpu);
1727 case KVM_MP_STATE_OPERATING:
1728 kvm_s390_vcpu_start(vcpu);
1730 case KVM_MP_STATE_LOAD:
1731 case KVM_MP_STATE_CHECK_STOP:
1732 /* fall through - CHECK_STOP and LOAD are not supported yet */
1740 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1742 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1745 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1747 if (!vcpu->requests)
1750 kvm_s390_vcpu_request_handled(vcpu);
1752 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1753 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1754 * This ensures that the ipte instruction for this request has
1755 * already finished. We might race against a second unmapper that
1756 * wants to set the blocking bit. Lets just retry the request loop.
1758 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1760 rc = gmap_ipte_notify(vcpu->arch.gmap,
1761 kvm_s390_get_prefix(vcpu),
1768 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1769 vcpu->arch.sie_block->ihcpu = 0xffff;
1773 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1774 if (!ibs_enabled(vcpu)) {
1775 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1776 atomic_set_mask(CPUSTAT_IBS,
1777 &vcpu->arch.sie_block->cpuflags);
1782 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1783 if (ibs_enabled(vcpu)) {
1784 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1785 atomic_clear_mask(CPUSTAT_IBS,
1786 &vcpu->arch.sie_block->cpuflags);
1791 /* nothing to do, just clear the request */
1792 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1798 * kvm_arch_fault_in_page - fault-in guest page if necessary
1799 * @vcpu: The corresponding virtual cpu
1800 * @gpa: Guest physical address
1801 * @writable: Whether the page should be writable or not
1803 * Make sure that a guest page has been faulted-in on the host.
1805 * Return: Zero on success, negative error code otherwise.
1807 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1809 return gmap_fault(vcpu->arch.gmap, gpa,
1810 writable ? FAULT_FLAG_WRITE : 0);
1813 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1814 unsigned long token)
1816 struct kvm_s390_interrupt inti;
1817 struct kvm_s390_irq irq;
1820 irq.u.ext.ext_params2 = token;
1821 irq.type = KVM_S390_INT_PFAULT_INIT;
1822 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1824 inti.type = KVM_S390_INT_PFAULT_DONE;
1825 inti.parm64 = token;
1826 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1830 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1831 struct kvm_async_pf *work)
1833 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1834 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1837 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1838 struct kvm_async_pf *work)
1840 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1841 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1844 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1845 struct kvm_async_pf *work)
1847 /* s390 will always inject the page directly */
1850 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1853 * s390 will always inject the page directly,
1854 * but we still want check_async_completion to cleanup
1859 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1862 struct kvm_arch_async_pf arch;
1865 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1867 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1868 vcpu->arch.pfault_compare)
1870 if (psw_extint_disabled(vcpu))
1872 if (kvm_s390_vcpu_has_irq(vcpu, 0))
1874 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1876 if (!vcpu->arch.gmap->pfault_enabled)
1879 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1880 hva += current->thread.gmap_addr & ~PAGE_MASK;
1881 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1884 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1888 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1893 * On s390 notifications for arriving pages will be delivered directly
1894 * to the guest but the house keeping for completed pfaults is
1895 * handled outside the worker.
1897 kvm_check_async_pf_completion(vcpu);
1899 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1904 if (test_cpu_flag(CIF_MCCK_PENDING))
1907 if (!kvm_is_ucontrol(vcpu->kvm)) {
1908 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1913 rc = kvm_s390_handle_requests(vcpu);
1917 if (guestdbg_enabled(vcpu)) {
1918 kvm_s390_backup_guest_per_regs(vcpu);
1919 kvm_s390_patch_guest_per_regs(vcpu);
1922 vcpu->arch.sie_block->icptcode = 0;
1923 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1924 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1925 trace_kvm_s390_sie_enter(vcpu, cpuflags);
1930 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
1932 psw_t *psw = &vcpu->arch.sie_block->gpsw;
1936 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1937 trace_kvm_s390_sie_fault(vcpu);
1940 * We want to inject an addressing exception, which is defined as a
1941 * suppressing or terminating exception. However, since we came here
1942 * by a DAT access exception, the PSW still points to the faulting
1943 * instruction since DAT exceptions are nullifying. So we've got
1944 * to look up the current opcode to get the length of the instruction
1945 * to be able to forward the PSW.
1947 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
1949 return kvm_s390_inject_prog_cond(vcpu, rc);
1950 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
1952 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1955 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1959 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1960 vcpu->arch.sie_block->icptcode);
1961 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1963 if (guestdbg_enabled(vcpu))
1964 kvm_s390_restore_guest_per_regs(vcpu);
1966 if (exit_reason >= 0) {
1968 } else if (kvm_is_ucontrol(vcpu->kvm)) {
1969 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1970 vcpu->run->s390_ucontrol.trans_exc_code =
1971 current->thread.gmap_addr;
1972 vcpu->run->s390_ucontrol.pgm_code = 0x10;
1975 } else if (current->thread.gmap_pfault) {
1976 trace_kvm_s390_major_guest_pfault(vcpu);
1977 current->thread.gmap_pfault = 0;
1978 if (kvm_arch_setup_async_pf(vcpu)) {
1981 gpa_t gpa = current->thread.gmap_addr;
1982 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1987 rc = vcpu_post_run_fault_in_sie(vcpu);
1989 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1992 if (kvm_is_ucontrol(vcpu->kvm))
1993 /* Don't exit for host interrupts. */
1994 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1996 rc = kvm_handle_sie_intercept(vcpu);
2002 static int __vcpu_run(struct kvm_vcpu *vcpu)
2004 int rc, exit_reason;
2007 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2008 * ning the guest), so that memslots (and other stuff) are protected
2010 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2013 rc = vcpu_pre_run(vcpu);
2017 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2019 * As PF_VCPU will be used in fault handler, between
2020 * guest_enter and guest_exit should be no uaccess.
2022 local_irq_disable();
2023 __kvm_guest_enter();
2025 exit_reason = sie64a(vcpu->arch.sie_block,
2026 vcpu->run->s.regs.gprs);
2027 local_irq_disable();
2030 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2032 rc = vcpu_post_run(vcpu, exit_reason);
2033 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2035 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2039 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2041 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2042 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2043 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2044 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2045 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2046 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2047 /* some control register changes require a tlb flush */
2048 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2050 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2051 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2052 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2053 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2054 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2055 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2057 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2058 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2059 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2060 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2061 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2062 kvm_clear_async_pf_completion_queue(vcpu);
2064 kvm_run->kvm_dirty_regs = 0;
2067 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2069 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2070 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2071 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2072 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2073 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2074 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2075 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2076 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2077 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2078 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2079 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2080 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2083 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2088 if (guestdbg_exit_pending(vcpu)) {
2089 kvm_s390_prepare_debug_exit(vcpu);
2093 if (vcpu->sigset_active)
2094 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2096 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2097 kvm_s390_vcpu_start(vcpu);
2098 } else if (is_vcpu_stopped(vcpu)) {
2099 pr_err_ratelimited("can't run stopped vcpu %d\n",
2104 sync_regs(vcpu, kvm_run);
2107 rc = __vcpu_run(vcpu);
2109 if (signal_pending(current) && !rc) {
2110 kvm_run->exit_reason = KVM_EXIT_INTR;
2114 if (guestdbg_exit_pending(vcpu) && !rc) {
2115 kvm_s390_prepare_debug_exit(vcpu);
2119 if (rc == -EOPNOTSUPP) {
2120 /* intercept cannot be handled in-kernel, prepare kvm-run */
2121 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
2122 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2123 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2124 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2128 if (rc == -EREMOTE) {
2129 /* intercept was handled, but userspace support is needed
2130 * kvm_run has been prepared by the handler */
2134 store_regs(vcpu, kvm_run);
2136 if (vcpu->sigset_active)
2137 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2139 vcpu->stat.exit_userspace++;
2144 * store status at address
2145 * we use have two special cases:
2146 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2147 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2149 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2151 unsigned char archmode = 1;
2156 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2157 if (write_guest_abs(vcpu, 163, &archmode, 1))
2159 gpa = SAVE_AREA_BASE;
2160 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2161 if (write_guest_real(vcpu, 163, &archmode, 1))
2163 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2165 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2166 vcpu->arch.guest_fpregs.fprs, 128);
2167 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2168 vcpu->run->s.regs.gprs, 128);
2169 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2170 &vcpu->arch.sie_block->gpsw, 16);
2171 px = kvm_s390_get_prefix(vcpu);
2172 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2174 rc |= write_guest_abs(vcpu,
2175 gpa + offsetof(struct save_area, fp_ctrl_reg),
2176 &vcpu->arch.guest_fpregs.fpc, 4);
2177 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2178 &vcpu->arch.sie_block->todpr, 4);
2179 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2180 &vcpu->arch.sie_block->cputm, 8);
2181 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2182 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2184 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2185 &vcpu->run->s.regs.acrs, 64);
2186 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2187 &vcpu->arch.sie_block->gcr, 128);
2188 return rc ? -EFAULT : 0;
2191 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2194 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2195 * copying in vcpu load/put. Lets update our copies before we save
2196 * it into the save area
2198 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
2199 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
2200 save_access_regs(vcpu->run->s.regs.acrs);
2202 return kvm_s390_store_status_unloaded(vcpu, addr);
2206 * store additional status at address
2208 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2211 /* Only bits 0-53 are used for address formation */
2212 if (!(gpa & ~0x3ff))
2215 return write_guest_abs(vcpu, gpa & ~0x3ff,
2216 (void *)&vcpu->run->s.regs.vrs, 512);
2219 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2221 if (!test_kvm_facility(vcpu->kvm, 129))
2225 * The guest VXRS are in the host VXRs due to the lazy
2226 * copying in vcpu load/put. Let's update our copies before we save
2227 * it into the save area.
2229 save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
2231 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2234 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2236 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2237 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2240 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2243 struct kvm_vcpu *vcpu;
2245 kvm_for_each_vcpu(i, vcpu, kvm) {
2246 __disable_ibs_on_vcpu(vcpu);
2250 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2252 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2253 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2256 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2258 int i, online_vcpus, started_vcpus = 0;
2260 if (!is_vcpu_stopped(vcpu))
2263 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2264 /* Only one cpu at a time may enter/leave the STOPPED state. */
2265 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2266 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2268 for (i = 0; i < online_vcpus; i++) {
2269 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2273 if (started_vcpus == 0) {
2274 /* we're the only active VCPU -> speed it up */
2275 __enable_ibs_on_vcpu(vcpu);
2276 } else if (started_vcpus == 1) {
2278 * As we are starting a second VCPU, we have to disable
2279 * the IBS facility on all VCPUs to remove potentially
2280 * oustanding ENABLE requests.
2282 __disable_ibs_on_all_vcpus(vcpu->kvm);
2285 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2287 * Another VCPU might have used IBS while we were offline.
2288 * Let's play safe and flush the VCPU at startup.
2290 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2291 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2295 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2297 int i, online_vcpus, started_vcpus = 0;
2298 struct kvm_vcpu *started_vcpu = NULL;
2300 if (is_vcpu_stopped(vcpu))
2303 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2304 /* Only one cpu at a time may enter/leave the STOPPED state. */
2305 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2306 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2308 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2309 kvm_s390_clear_stop_irq(vcpu);
2311 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2312 __disable_ibs_on_vcpu(vcpu);
2314 for (i = 0; i < online_vcpus; i++) {
2315 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2317 started_vcpu = vcpu->kvm->vcpus[i];
2321 if (started_vcpus == 1) {
2323 * As we only have one VCPU left, we want to enable the
2324 * IBS facility for that VCPU to speed it up.
2326 __enable_ibs_on_vcpu(started_vcpu);
2329 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2333 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2334 struct kvm_enable_cap *cap)
2342 case KVM_CAP_S390_CSS_SUPPORT:
2343 if (!vcpu->kvm->arch.css_support) {
2344 vcpu->kvm->arch.css_support = 1;
2345 trace_kvm_s390_enable_css(vcpu->kvm);
2356 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2357 struct kvm_s390_mem_op *mop)
2359 void __user *uaddr = (void __user *)mop->buf;
2360 void *tmpbuf = NULL;
2362 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2363 | KVM_S390_MEMOP_F_CHECK_ONLY;
2365 if (mop->flags & ~supported_flags)
2368 if (mop->size > MEM_OP_MAX_SIZE)
2371 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2372 tmpbuf = vmalloc(mop->size);
2377 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2380 case KVM_S390_MEMOP_LOGICAL_READ:
2381 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2382 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2385 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2387 if (copy_to_user(uaddr, tmpbuf, mop->size))
2391 case KVM_S390_MEMOP_LOGICAL_WRITE:
2392 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2393 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2396 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2400 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2406 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2408 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2409 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2415 long kvm_arch_vcpu_ioctl(struct file *filp,
2416 unsigned int ioctl, unsigned long arg)
2418 struct kvm_vcpu *vcpu = filp->private_data;
2419 void __user *argp = (void __user *)arg;
2424 case KVM_S390_IRQ: {
2425 struct kvm_s390_irq s390irq;
2428 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2430 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2433 case KVM_S390_INTERRUPT: {
2434 struct kvm_s390_interrupt s390int;
2435 struct kvm_s390_irq s390irq;
2438 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2440 if (s390int_to_s390irq(&s390int, &s390irq))
2442 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2445 case KVM_S390_STORE_STATUS:
2446 idx = srcu_read_lock(&vcpu->kvm->srcu);
2447 r = kvm_s390_vcpu_store_status(vcpu, arg);
2448 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2450 case KVM_S390_SET_INITIAL_PSW: {
2454 if (copy_from_user(&psw, argp, sizeof(psw)))
2456 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2459 case KVM_S390_INITIAL_RESET:
2460 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2462 case KVM_SET_ONE_REG:
2463 case KVM_GET_ONE_REG: {
2464 struct kvm_one_reg reg;
2466 if (copy_from_user(®, argp, sizeof(reg)))
2468 if (ioctl == KVM_SET_ONE_REG)
2469 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2471 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2474 #ifdef CONFIG_KVM_S390_UCONTROL
2475 case KVM_S390_UCAS_MAP: {
2476 struct kvm_s390_ucas_mapping ucasmap;
2478 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2483 if (!kvm_is_ucontrol(vcpu->kvm)) {
2488 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2489 ucasmap.vcpu_addr, ucasmap.length);
2492 case KVM_S390_UCAS_UNMAP: {
2493 struct kvm_s390_ucas_mapping ucasmap;
2495 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2500 if (!kvm_is_ucontrol(vcpu->kvm)) {
2505 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2510 case KVM_S390_VCPU_FAULT: {
2511 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2514 case KVM_ENABLE_CAP:
2516 struct kvm_enable_cap cap;
2518 if (copy_from_user(&cap, argp, sizeof(cap)))
2520 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2523 case KVM_S390_MEM_OP: {
2524 struct kvm_s390_mem_op mem_op;
2526 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2527 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2532 case KVM_S390_SET_IRQ_STATE: {
2533 struct kvm_s390_irq_state irq_state;
2536 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2538 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2539 irq_state.len == 0 ||
2540 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2544 r = kvm_s390_set_irq_state(vcpu,
2545 (void __user *) irq_state.buf,
2549 case KVM_S390_GET_IRQ_STATE: {
2550 struct kvm_s390_irq_state irq_state;
2553 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2555 if (irq_state.len == 0) {
2559 r = kvm_s390_get_irq_state(vcpu,
2560 (__u8 __user *) irq_state.buf,
2570 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2572 #ifdef CONFIG_KVM_S390_UCONTROL
2573 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2574 && (kvm_is_ucontrol(vcpu->kvm))) {
2575 vmf->page = virt_to_page(vcpu->arch.sie_block);
2576 get_page(vmf->page);
2580 return VM_FAULT_SIGBUS;
2583 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2584 unsigned long npages)
2589 /* Section: memory related */
2590 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2591 struct kvm_memory_slot *memslot,
2592 const struct kvm_userspace_memory_region *mem,
2593 enum kvm_mr_change change)
2595 /* A few sanity checks. We can have memory slots which have to be
2596 located/ended at a segment boundary (1MB). The memory in userland is
2597 ok to be fragmented into various different vmas. It is okay to mmap()
2598 and munmap() stuff in this slot after doing this call at any time */
2600 if (mem->userspace_addr & 0xffffful)
2603 if (mem->memory_size & 0xffffful)
2609 void kvm_arch_commit_memory_region(struct kvm *kvm,
2610 const struct kvm_userspace_memory_region *mem,
2611 const struct kvm_memory_slot *old,
2612 const struct kvm_memory_slot *new,
2613 enum kvm_mr_change change)
2617 /* If the basics of the memslot do not change, we do not want
2618 * to update the gmap. Every update causes several unnecessary
2619 * segment translation exceptions. This is usually handled just
2620 * fine by the normal fault handler + gmap, but it will also
2621 * cause faults on the prefix page of running guest CPUs.
2623 if (old->userspace_addr == mem->userspace_addr &&
2624 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2625 old->npages * PAGE_SIZE == mem->memory_size)
2628 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2629 mem->guest_phys_addr, mem->memory_size);
2631 pr_warn("failed to commit memory region\n");
2635 static int __init kvm_s390_init(void)
2637 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2640 static void __exit kvm_s390_exit(void)
2645 module_init(kvm_s390_init);
2646 module_exit(kvm_s390_exit);
2649 * Enable autoloading of the kvm module.
2650 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2651 * since x86 takes a different approach.
2653 #include <linux/miscdevice.h>
2654 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2655 MODULE_ALIAS("devname:kvm");