Merge tag 'kvm-s390-20140422' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms39...
authorMarcelo Tosatti <mtosatti@redhat.com>
Tue, 22 Apr 2014 13:51:06 +0000 (10:51 -0300)
committerMarcelo Tosatti <mtosatti@redhat.com>
Tue, 22 Apr 2014 13:51:06 +0000 (10:51 -0300)
Lazy storage key handling
-------------------------
Linux does not use the ACC and F bits of the storage key. Newer Linux
versions also do not use the storage keys for dirty and reference
tracking. We can optimize the guest handling for those guests for faults
as well as page-in and page-out by simply not caring about the guest
visible storage key. We trap guest storage key instruction to enable
those keys only on demand.

Migration bitmap

Until now s390 never provided a proper dirty bitmap.  Let's provide a
proper migration bitmap for s390. We also change the user dirty tracking
to a fault based mechanism. This makes the host completely independent
from the storage keys. Long term this will allow us to back guest memory
with large pages.

per-VM device attributes
------------------------
To avoid the introduction of new ioctls, let's provide the
attribute semanantic also on the VM-"device".

Userspace controlled CMMA
-------------------------
The CMMA assist is changed from "always on" to "on if requested" via
per-VM device attributes. In addition a callback to reset all usage
states is provided.

Proper guest DAT handling for intercepts
----------------------------------------
While instructions handled by SIE take care of all addressing aspects,
KVM/s390 currently does not care about guest address translation of
intercepts. This worked out fine, because
- the s390 Linux kernel has a 1:1 mapping between kernel virtual<->real
 for all pages up to memory size
- intercepts happen only for a small amount of cases
- all of these intercepts happen to be in the kernel text for current
  distros

Of course we need to be better for other intercepts, kernel modules etc.
We provide the infrastructure and rework all in-kernel intercepts to work
on logical addresses (paging etc) instead of real ones. The code has
been running internally for several months now, so it is time for going
public.

GDB support
-----------
We provide breakpoints, single stepping and watchpoints.

Fixes/Cleanups
--------------
- Improve program check delivery
- Factor out the handling of transactional memory  on program checks
- Use the existing define __LC_PGM_TDB
- Several cleanups in the lowcore structure
- Documentation

NOTES
-----
- All patches touching base s390 are either ACKed or written by the s390
  maintainers
- One base KVM patch "KVM: add kvm_is_error_gpa() helper"
- One patch introduces the notion of VM device attributes

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Conflicts:
include/uapi/linux/kvm.h

33 files changed:
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/devices/vm.txt [new file with mode: 0644]
Documentation/virtual/kvm/s390-diag.txt
arch/s390/include/asm/ctl_reg.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/lowcore.h
arch/s390/include/asm/mmu.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/pgalloc.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/ptrace.h
arch/s390/include/asm/sclp.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/entry.S
arch/s390/kernel/entry64.S
arch/s390/kvm/Makefile
arch/s390/kvm/diag.c
arch/s390/kvm/gaccess.c [new file with mode: 0644]
arch/s390/kvm/gaccess.h
arch/s390/kvm/guestdbg.c [new file with mode: 0644]
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/sigp.c
arch/s390/kvm/trace.h
arch/s390/mm/pgtable.c
drivers/s390/char/sclp_early.c
include/linux/kvm_host.h
include/uapi/linux/kvm.h
virt/kvm/kvm_main.c

index a9380ba54c8e984a997edb65fae8be9fef02f761..2014ff12b492ed242dc01b7c4615d3458c593598 100644 (file)
@@ -2314,8 +2314,8 @@ struct kvm_create_device {
 
 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 
-Capability: KVM_CAP_DEVICE_CTRL
-Type: device ioctl
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
 Parameters: struct kvm_device_attr
 Returns: 0 on success, -1 on error
 Errors:
@@ -2340,8 +2340,8 @@ struct kvm_device_attr {
 
 4.81 KVM_HAS_DEVICE_ATTR
 
-Capability: KVM_CAP_DEVICE_CTRL
-Type: device ioctl
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
 Parameters: struct kvm_device_attr
 Returns: 0 on success, -1 on error
 Errors:
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
new file mode 100644 (file)
index 0000000..0d16f96
--- /dev/null
@@ -0,0 +1,26 @@
+Generic vm interface
+====================================
+
+The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
+struct kvm_device_attr as other devices, but targets VM-wide settings
+and controls.
+
+The groups and attributes per virtual machine, if any, are architecture
+specific.
+
+1. GROUP: KVM_S390_VM_MEM_CTRL
+Architectures: s390
+
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+Parameters: none
+Returns: -EBUSY if already a vcpus is defined, otherwise 0
+
+Enables CMMA for the virtual machine
+
+1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
+Parameteres: none
+Returns: 0
+
+Clear the CMMA status for all guest pages, so any pages the guest marked
+as unused are again used any may not be reclaimed by the host.
index f1de4fbade155b3ccccdf7d0328cdf0adacf2a7f..48c4921794edf0b98c9dfb59e0361b235d3124e2 100644 (file)
@@ -78,3 +78,5 @@ DIAGNOSE function code 'X'501 - KVM breakpoint
 
 If the function code specifies 0x501, breakpoint functions may be performed.
 This function code is handled by userspace.
+
+This diagnose function code has no subfunctions and uses no parameters.
index 4e63f1a13600a4e126d470138e074d2e4e98682d..31ab9f346d7e37f1c7ce8048db783ba4a15d128c 100644 (file)
@@ -57,6 +57,20 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
+union ctlreg0 {
+       unsigned long val;
+       struct {
+#ifdef CONFIG_64BIT
+               unsigned long      : 32;
+#endif
+               unsigned long      : 3;
+               unsigned long lap  : 1; /* Low-address-protection control */
+               unsigned long      : 4;
+               unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+               unsigned long      : 23;
+       };
+};
+
 #ifdef CONFIG_SMP
 # define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
 # define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
index 154b60089be996de483f07844f9229c728918892..0d45f6fe734f0a085b227efe5bcc97928e6df8bd 100644 (file)
@@ -39,9 +39,17 @@ struct sca_entry {
        __u64   reserved2[2];
 } __attribute__((packed));
 
+union ipte_control {
+       unsigned long val;
+       struct {
+               unsigned long k  : 1;
+               unsigned long kh : 31;
+               unsigned long kg : 32;
+       };
+};
 
 struct sca_block {
-       __u64   ipte_control;
+       union ipte_control ipte_control;
        __u64   reserved[5];
        __u64   mcn;
        __u64   reserved2;
@@ -85,12 +93,26 @@ struct kvm_s390_sie_block {
        __u8    reserved40[4];          /* 0x0040 */
 #define LCTL_CR0       0x8000
 #define LCTL_CR6       0x0200
+#define LCTL_CR9       0x0040
+#define LCTL_CR10      0x0020
+#define LCTL_CR11      0x0010
 #define LCTL_CR14      0x0002
        __u16   lctl;                   /* 0x0044 */
        __s16   icpua;                  /* 0x0046 */
-#define ICTL_LPSW 0x00400000
+#define ICTL_PINT      0x20000000
+#define ICTL_LPSW      0x00400000
+#define ICTL_STCTL     0x00040000
+#define ICTL_ISKE      0x00004000
+#define ICTL_SSKE      0x00002000
+#define ICTL_RRBE      0x00001000
        __u32   ictl;                   /* 0x0048 */
        __u32   eca;                    /* 0x004c */
+#define ICPT_INST      0x04
+#define ICPT_PROGI     0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_OPEREXC   0x2C
+#define ICPT_PARTEXEC  0x38
+#define ICPT_IOINST    0x40
        __u8    icptcode;               /* 0x0050 */
        __u8    reserved51;             /* 0x0051 */
        __u16   ihcpu;                  /* 0x0052 */
@@ -109,9 +131,21 @@ struct kvm_s390_sie_block {
        psw_t   gpsw;                   /* 0x0090 */
        __u64   gg14;                   /* 0x00a0 */
        __u64   gg15;                   /* 0x00a8 */
-       __u8    reservedb0[30];         /* 0x00b0 */
-       __u16   iprcc;                  /* 0x00ce */
-       __u8    reservedd0[48];         /* 0x00d0 */
+       __u8    reservedb0[28];         /* 0x00b0 */
+       __u16   pgmilc;                 /* 0x00cc */
+       __u16   iprcc;                  /* 0x00ce */
+       __u32   dxc;                    /* 0x00d0 */
+       __u16   mcn;                    /* 0x00d4 */
+       __u8    perc;                   /* 0x00d6 */
+       __u8    peratmid;               /* 0x00d7 */
+       __u64   peraddr;                /* 0x00d8 */
+       __u8    eai;                    /* 0x00e0 */
+       __u8    peraid;                 /* 0x00e1 */
+       __u8    oai;                    /* 0x00e2 */
+       __u8    armid;                  /* 0x00e3 */
+       __u8    reservede4[4];          /* 0x00e4 */
+       __u64   tecmc;                  /* 0x00e8 */
+       __u8    reservedf0[16];         /* 0x00f0 */
        __u64   gcr[16];                /* 0x0100 */
        __u64   gbea;                   /* 0x0180 */
        __u8    reserved188[24];        /* 0x0188 */
@@ -146,6 +180,8 @@ struct kvm_vcpu_stat {
        u32 exit_instruction;
        u32 instruction_lctl;
        u32 instruction_lctlg;
+       u32 instruction_stctl;
+       u32 instruction_stctg;
        u32 exit_program_interruption;
        u32 exit_instr_and_program;
        u32 deliver_external_call;
@@ -164,6 +200,7 @@ struct kvm_vcpu_stat {
        u32 instruction_stpx;
        u32 instruction_stap;
        u32 instruction_storage_key;
+       u32 instruction_ipte_interlock;
        u32 instruction_stsch;
        u32 instruction_chsc;
        u32 instruction_stsi;
@@ -183,13 +220,58 @@ struct kvm_vcpu_stat {
        u32 diagnose_9c;
 };
 
-#define PGM_OPERATION            0x01
-#define PGM_PRIVILEGED_OP       0x02
-#define PGM_EXECUTE              0x03
-#define PGM_PROTECTION           0x04
-#define PGM_ADDRESSING           0x05
-#define PGM_SPECIFICATION        0x06
-#define PGM_DATA                 0x07
+#define PGM_OPERATION                  0x01
+#define PGM_PRIVILEGED_OP              0x02
+#define PGM_EXECUTE                    0x03
+#define PGM_PROTECTION                 0x04
+#define PGM_ADDRESSING                 0x05
+#define PGM_SPECIFICATION              0x06
+#define PGM_DATA                       0x07
+#define PGM_FIXED_POINT_OVERFLOW       0x08
+#define PGM_FIXED_POINT_DIVIDE         0x09
+#define PGM_DECIMAL_OVERFLOW           0x0a
+#define PGM_DECIMAL_DIVIDE             0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW      0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW     0x0d
+#define PGM_HFP_SIGNIFICANCE           0x0e
+#define PGM_HFP_DIVIDE                 0x0f
+#define PGM_SEGMENT_TRANSLATION                0x10
+#define PGM_PAGE_TRANSLATION           0x11
+#define PGM_TRANSLATION_SPEC           0x12
+#define PGM_SPECIAL_OPERATION          0x13
+#define PGM_OPERAND                    0x15
+#define PGM_TRACE_TABEL                        0x16
+#define PGM_SPACE_SWITCH               0x1c
+#define PGM_HFP_SQUARE_ROOT            0x1d
+#define PGM_PC_TRANSLATION_SPEC                0x1f
+#define PGM_AFX_TRANSLATION            0x20
+#define PGM_ASX_TRANSLATION            0x21
+#define PGM_LX_TRANSLATION             0x22
+#define PGM_EX_TRANSLATION             0x23
+#define PGM_PRIMARY_AUTHORITY          0x24
+#define PGM_SECONDARY_AUTHORITY                0x25
+#define PGM_LFX_TRANSLATION            0x26
+#define PGM_LSX_TRANSLATION            0x27
+#define PGM_ALET_SPECIFICATION         0x28
+#define PGM_ALEN_TRANSLATION           0x29
+#define PGM_ALE_SEQUENCE               0x2a
+#define PGM_ASTE_VALIDITY              0x2b
+#define PGM_ASTE_SEQUENCE              0x2c
+#define PGM_EXTENDED_AUTHORITY         0x2d
+#define PGM_LSTE_SEQUENCE              0x2e
+#define PGM_ASTE_INSTANCE              0x2f
+#define PGM_STACK_FULL                 0x30
+#define PGM_STACK_EMPTY                        0x31
+#define PGM_STACK_SPECIFICATION                0x32
+#define PGM_STACK_TYPE                 0x33
+#define PGM_STACK_OPERATION            0x34
+#define PGM_ASCE_TYPE                  0x38
+#define PGM_REGION_FIRST_TRANS         0x39
+#define PGM_REGION_SECOND_TRANS                0x3a
+#define PGM_REGION_THIRD_TRANS         0x3b
+#define PGM_MONITOR                    0x40
+#define PGM_PER                                0x80
+#define PGM_CRYPTO_OPERATION           0x119
 
 struct kvm_s390_interrupt_info {
        struct list_head list;
@@ -229,6 +311,45 @@ struct kvm_s390_float_interrupt {
        unsigned int irq_count;
 };
 
+struct kvm_hw_wp_info_arch {
+       unsigned long addr;
+       unsigned long phys_addr;
+       int len;
+       char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+       unsigned long addr;
+       int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+               (vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+               (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+               (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+               (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+       unsigned long cr0;
+       unsigned long cr9;
+       unsigned long cr10;
+       unsigned long cr11;
+       struct kvm_hw_bp_info_arch *hw_bp_info;
+       struct kvm_hw_wp_info_arch *hw_wp_info;
+       int nr_hw_bp;
+       int nr_hw_wp;
+       unsigned long last_bp;
+};
 
 struct kvm_vcpu_arch {
        struct kvm_s390_sie_block *sie_block;
@@ -238,11 +359,13 @@ struct kvm_vcpu_arch {
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct tasklet_struct tasklet;
+       struct kvm_s390_pgm_info pgm;
        union  {
                struct cpuid    cpu_id;
                u64             stidp_data;
        };
        struct gmap *gmap;
+       struct kvm_guestdbg_info_arch guestdbg;
 #define KVM_S390_PFAULT_TOKEN_INVALID  (-1UL)
        unsigned long pfault_token;
        unsigned long pfault_select;
@@ -285,7 +408,9 @@ struct kvm_arch{
        struct gmap *gmap;
        int css_support;
        int use_irqchip;
+       int use_cmma;
        struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+       wait_queue_head_t ipte_wq;
 };
 
 #define KVM_HVA_ERR_BAD                (-1UL)
index bbf8141408cdadf2881011a683937c3636e17375..e88cb8c54130d8ba6675be321193d93ca0e23765 100644 (file)
@@ -56,13 +56,14 @@ struct _lowcore {
        __u16   pgm_code;                       /* 0x008e */
        __u32   trans_exc_code;                 /* 0x0090 */
        __u16   mon_class_num;                  /* 0x0094 */
-       __u16   per_perc_atmid;                 /* 0x0096 */
+       __u8    per_code;                       /* 0x0096 */
+       __u8    per_atmid;                      /* 0x0097 */
        __u32   per_address;                    /* 0x0098 */
        __u32   monitor_code;                   /* 0x009c */
        __u8    exc_access_id;                  /* 0x00a0 */
        __u8    per_access_id;                  /* 0x00a1 */
        __u8    op_access_id;                   /* 0x00a2 */
-       __u8    ar_access_id;                   /* 0x00a3 */
+       __u8    ar_mode_id;                     /* 0x00a3 */
        __u8    pad_0x00a4[0x00b8-0x00a4];      /* 0x00a4 */
        __u16   subchannel_id;                  /* 0x00b8 */
        __u16   subchannel_nr;                  /* 0x00ba */
@@ -196,12 +197,13 @@ struct _lowcore {
        __u16   pgm_code;                       /* 0x008e */
        __u32   data_exc_code;                  /* 0x0090 */
        __u16   mon_class_num;                  /* 0x0094 */
-       __u16   per_perc_atmid;                 /* 0x0096 */
+       __u8    per_code;                       /* 0x0096 */
+       __u8    per_atmid;                      /* 0x0097 */
        __u64   per_address;                    /* 0x0098 */
        __u8    exc_access_id;                  /* 0x00a0 */
        __u8    per_access_id;                  /* 0x00a1 */
        __u8    op_access_id;                   /* 0x00a2 */
-       __u8    ar_access_id;                   /* 0x00a3 */
+       __u8    ar_mode_id;                     /* 0x00a3 */
        __u8    pad_0x00a4[0x00a8-0x00a4];      /* 0x00a4 */
        __u64   trans_exc_code;                 /* 0x00a8 */
        __u64   monitor_code;                   /* 0x00b0 */
index f77695a82f647dbad92539be85dbac0babc60e2d..a5e656260a70183dd4f3768c3be082fed5988603 100644 (file)
@@ -16,6 +16,8 @@ typedef struct {
        unsigned long vdso_base;
        /* The mmu context has extended page tables. */
        unsigned int has_pgste:1;
+       /* The mmu context uses storage keys. */
+       unsigned int use_skey:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)                                                \
index 71be346d0e3c8074d7be6542815610567ff66606..d42fb1b728d89c1768ad3ccefd9fb2fd82c20306 100644 (file)
@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
        mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
        mm->context.has_pgste = 0;
+       mm->context.use_skey = 0;
        mm->context.asce_limit = STACK_TOP_MAX;
        crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
        return 0;
index 884017cbfa9fade412372f7f781e503b3f39513b..9e18a61d3df39c0c96f81032ae5f67ad427fa32c 100644 (file)
@@ -22,7 +22,8 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *);
 
-void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long);
+void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
+                           bool init_skey);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
                          unsigned long key, bool nq);
 
index 12f75313e086d4695ee768bde41beac4d3418de9..fcba5e03839f560d0348a4df0d7493ea5151e90b 100644 (file)
@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT   0x00200000UL
 #define PGSTE_GR_BIT   0x00040000UL
 #define PGSTE_GC_BIT   0x00020000UL
-#define PGSTE_IN_BIT   0x00008000UL    /* IPTE notify bit */
+#define PGSTE_UC_BIT   0x00008000UL    /* user dirty (migration) */
+#define PGSTE_IN_BIT   0x00004000UL    /* IPTE notify bit */
 
 #else /* CONFIG_64BIT */
 
@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT   0x0020000000000000UL
 #define PGSTE_GR_BIT   0x0004000000000000UL
 #define PGSTE_GC_BIT   0x0002000000000000UL
-#define PGSTE_IN_BIT   0x0000800000000000UL    /* IPTE notify bit */
+#define PGSTE_UC_BIT   0x0000800000000000UL    /* user dirty (migration) */
+#define PGSTE_IN_BIT   0x0000400000000000UL    /* IPTE notify bit */
 
 #endif /* CONFIG_64BIT */
 
@@ -466,6 +468,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
 #endif
        return 0;
 }
+
+static inline int mm_use_skey(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+       if (mm->context.use_skey)
+               return 1;
+#endif
+       return 0;
+}
+
 /*
  * pgd/pmd/pte query functions
  */
@@ -699,26 +711,17 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 #endif
 }
 
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
+                                      struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
        unsigned long address, bits, skey;
 
-       if (pte_val(*ptep) & _PAGE_INVALID)
+       if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
                return pgste;
        address = pte_val(*ptep) & PAGE_MASK;
        skey = (unsigned long) page_get_storage_key(address);
        bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
-       if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
-               /* Transfer dirty + referenced bit to host bits in pgste */
-               pgste_val(pgste) |= bits << 52;
-               page_set_storage_key(address, skey ^ bits, 0);
-       } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
-                  (bits & _PAGE_REFERENCED)) {
-               /* Transfer referenced bit to host bit in pgste */
-               pgste_val(pgste) |= PGSTE_HR_BIT;
-               page_reset_referenced(address);
-       }
        /* Transfer page changed & referenced bit to guest bits in pgste */
        pgste_val(pgste) |= bits << 48;         /* GR bit & GC bit */
        /* Copy page access key and fetch protection bit to pgste */
@@ -729,25 +732,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 
 }
 
-static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
-       if (pte_val(*ptep) & _PAGE_INVALID)
-               return pgste;
-       /* Get referenced bit from storage key */
-       if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
-               pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
-#endif
-       return pgste;
-}
-
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+                                struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
        unsigned long address;
        unsigned long nkey;
 
-       if (pte_val(entry) & _PAGE_INVALID)
+       if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
                return;
        VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
        address = pte_val(entry) & PAGE_MASK;
@@ -757,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
         * key C/R to 0.
         */
        nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+       nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
        page_set_storage_key(address, nkey, 0);
 #endif
 }
 
-static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 {
-       if (!MACHINE_HAS_ESOP &&
-           (pte_val(entry) & _PAGE_PRESENT) &&
-           (pte_val(entry) & _PAGE_WRITE)) {
-               /*
-                * Without enhanced suppression-on-protection force
-                * the dirty bit on for all writable ptes.
-                */
-               pte_val(entry) |= _PAGE_DIRTY;
-               pte_val(entry) &= ~_PAGE_PROTECT;
+       if ((pte_val(entry) & _PAGE_PRESENT) &&
+           (pte_val(entry) & _PAGE_WRITE) &&
+           !(pte_val(entry) & _PAGE_INVALID)) {
+               if (!MACHINE_HAS_ESOP) {
+                       /*
+                        * Without enhanced suppression-on-protection force
+                        * the dirty bit on for all writable ptes.
+                        */
+                       pte_val(entry) |= _PAGE_DIRTY;
+                       pte_val(entry) &= ~_PAGE_PROTECT;
+               }
+               if (!(pte_val(entry) & _PAGE_PROTECT))
+                       /* This pte allows write access, set user-dirty */
+                       pgste_val(pgste) |= PGSTE_UC_BIT;
        }
        *ptep = entry;
+       return pgste;
 }
 
 /**
@@ -839,6 +838,8 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
 unsigned long gmap_fault(unsigned long address, struct gmap *);
 void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 void __gmap_zap(unsigned long address, struct gmap *);
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
+
 
 void gmap_register_ipte_notifier(struct gmap_notifier *);
 void gmap_unregister_ipte_notifier(struct gmap_notifier *);
@@ -870,8 +871,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
        if (mm_has_pgste(mm)) {
                pgste = pgste_get_lock(ptep);
                pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
-               pgste_set_key(ptep, pgste, entry);
-               pgste_set_pte(ptep, entry);
+               pgste_set_key(ptep, pgste, entry, mm);
+               pgste = pgste_set_pte(ptep, pgste, entry);
                pgste_set_unlock(ptep, pgste);
        } else {
                if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
@@ -1017,45 +1018,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
 }
 #endif
 
-/*
- * Get (and clear) the user dirty bit for a pte.
- */
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
-                                                pte_t *ptep)
-{
-       pgste_t pgste;
-       int dirty = 0;
-
-       if (mm_has_pgste(mm)) {
-               pgste = pgste_get_lock(ptep);
-               pgste = pgste_update_all(ptep, pgste);
-               dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
-               pgste_val(pgste) &= ~PGSTE_HC_BIT;
-               pgste_set_unlock(ptep, pgste);
-               return dirty;
-       }
-       return dirty;
-}
-
-/*
- * Get (and clear) the user referenced bit for a pte.
- */
-static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
-                                                pte_t *ptep)
-{
-       pgste_t pgste;
-       int young = 0;
-
-       if (mm_has_pgste(mm)) {
-               pgste = pgste_get_lock(ptep);
-               pgste = pgste_update_young(ptep, pgste);
-               young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
-               pgste_val(pgste) &= ~PGSTE_HR_BIT;
-               pgste_set_unlock(ptep, pgste);
-       }
-       return young;
-}
-
 static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
 {
        unsigned long pto = (unsigned long) ptep;
@@ -1118,6 +1080,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
        atomic_sub(0x10000, &mm->context.attach_count);
 }
 
+/*
+ * Get (and clear) the user dirty bit for a pte.
+ */
+static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
+                                                unsigned long addr,
+                                                pte_t *ptep)
+{
+       pgste_t pgste;
+       pte_t pte;
+       int dirty;
+
+       if (!mm_has_pgste(mm))
+               return 0;
+       pgste = pgste_get_lock(ptep);
+       dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+       pgste_val(pgste) &= ~PGSTE_UC_BIT;
+       pte = *ptep;
+       if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+               pgste = pgste_ipte_notify(mm, ptep, pgste);
+               __ptep_ipte(addr, ptep);
+               if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+                       pte_val(pte) |= _PAGE_PROTECT;
+               else
+                       pte_val(pte) |= _PAGE_INVALID;
+               *ptep = pte;
+       }
+       pgste_set_unlock(ptep, pgste);
+       return dirty;
+}
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
                                            unsigned long addr, pte_t *ptep)
@@ -1137,7 +1129,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
        pte = pte_mkold(pte);
 
        if (mm_has_pgste(vma->vm_mm)) {
-               pgste_set_pte(ptep, pte);
+               pgste = pgste_set_pte(ptep, pgste, pte);
                pgste_set_unlock(ptep, pgste);
        } else
                *ptep = pte;
@@ -1182,7 +1174,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
        pte_val(*ptep) = _PAGE_INVALID;
 
        if (mm_has_pgste(mm)) {
-               pgste = pgste_update_all(&pte, pgste);
+               pgste = pgste_update_all(&pte, pgste, mm);
                pgste_set_unlock(ptep, pgste);
        }
        return pte;
@@ -1205,7 +1197,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
        ptep_flush_lazy(mm, address, ptep);
 
        if (mm_has_pgste(mm)) {
-               pgste = pgste_update_all(&pte, pgste);
+               pgste = pgste_update_all(&pte, pgste, mm);
                pgste_set(ptep, pgste);
        }
        return pte;
@@ -1219,8 +1211,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 
        if (mm_has_pgste(mm)) {
                pgste = pgste_get(ptep);
-               pgste_set_key(ptep, pgste, pte);
-               pgste_set_pte(ptep, pte);
+               pgste_set_key(ptep, pgste, pte, mm);
+               pgste = pgste_set_pte(ptep, pgste, pte);
                pgste_set_unlock(ptep, pgste);
        } else
                *ptep = pte;
@@ -1246,7 +1238,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
                if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
                    _PGSTE_GPS_USAGE_UNUSED)
                        pte_val(pte) |= _PAGE_UNUSED;
-               pgste = pgste_update_all(&pte, pgste);
+               pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
                pgste_set_unlock(ptep, pgste);
        }
        return pte;
@@ -1278,7 +1270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
        pte_val(*ptep) = _PAGE_INVALID;
 
        if (!full && mm_has_pgste(mm)) {
-               pgste = pgste_update_all(&pte, pgste);
+               pgste = pgste_update_all(&pte, pgste, mm);
                pgste_set_unlock(ptep, pgste);
        }
        return pte;
@@ -1301,7 +1293,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
                pte = pte_wrprotect(pte);
 
                if (mm_has_pgste(mm)) {
-                       pgste_set_pte(ptep, pte);
+                       pgste = pgste_set_pte(ptep, pgste, pte);
                        pgste_set_unlock(ptep, pgste);
                } else
                        *ptep = pte;
@@ -1326,7 +1318,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
        ptep_flush_direct(vma->vm_mm, address, ptep);
 
        if (mm_has_pgste(vma->vm_mm)) {
-               pgste_set_pte(ptep, entry);
+               pgste = pgste_set_pte(ptep, pgste, entry);
                pgste_set_unlock(ptep, pgste);
        } else
                *ptep = entry;
@@ -1734,6 +1726,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
+extern void s390_enable_skey(void);
 
 /*
  * No page table caches to initialise
index f4783c0b7b43cfd4e58c2c4bfed19013f465fb1b..6e7708f3d866fb1dcb68e97dc5ddb40271b89652 100644 (file)
                         PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
                         PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
 
+struct psw_bits {
+       unsigned long long      : 1;
+       unsigned long long r    : 1; /* PER-Mask */
+       unsigned long long      : 3;
+       unsigned long long t    : 1; /* DAT Mode */
+       unsigned long long i    : 1; /* Input/Output Mask */
+       unsigned long long e    : 1; /* External Mask */
+       unsigned long long key  : 4; /* PSW Key */
+       unsigned long long      : 1;
+       unsigned long long m    : 1; /* Machine-Check Mask */
+       unsigned long long w    : 1; /* Wait State */
+       unsigned long long p    : 1; /* Problem State */
+       unsigned long long as   : 2; /* Address Space Control */
+       unsigned long long cc   : 2; /* Condition Code */
+       unsigned long long pm   : 4; /* Program Mask */
+       unsigned long long ri   : 1; /* Runtime Instrumentation */
+       unsigned long long      : 6;
+       unsigned long long eaba : 2; /* Addressing Mode */
+#ifdef CONFIG_64BIT
+       unsigned long long      : 31;
+       unsigned long long ia   : 64;/* Instruction Address */
+#else
+       unsigned long long ia   : 31;/* Instruction Address */
+#endif
+};
+
+enum {
+       PSW_AMODE_24BIT = 0,
+       PSW_AMODE_31BIT = 1,
+       PSW_AMODE_64BIT = 3
+};
+
+enum {
+       PSW_AS_PRIMARY   = 0,
+       PSW_AS_ACCREG    = 1,
+       PSW_AS_SECONDARY = 2,
+       PSW_AS_HOME      = 3
+};
+
+#define psw_bits(__psw) (*({                   \
+       typecheck(psw_t, __psw);                \
+       &(*(struct psw_bits *)(&(__psw)));      \
+}))
+
 /*
  * The pt_regs struct defines the way the registers are stored on
  * the stack during a system call.
index 2f5e9932b4defddda4587c6593492712f2fb85c1..943d4345111655385e867dd3e42981eddd73c787 100644 (file)
@@ -28,7 +28,11 @@ struct sclp_ipl_info {
 
 struct sclp_cpu_entry {
        u8 address;
-       u8 reserved0[13];
+       u8 reserved0[2];
+       u8 : 3;
+       u8 siif : 1;
+       u8 : 4;
+       u8 reserved2[10];
        u8 type;
        u8 reserved1;
 } __attribute__((packed));
@@ -61,5 +65,6 @@ int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 unsigned long sclp_get_hsa_size(void);
 void sclp_early_detect(void);
+int sclp_has_siif(void);
 
 #endif /* _ASM_S390_SCLP_H */
index c003c6a73b1e3e883b814aff03704a43c3c0d9a4..0fc26430a1e5e62cdedc0ce982e4e7981dcd910c 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 
 #define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
 
 /* Device control API: s390-specific devices */
 #define KVM_DEV_FLIC_GET_ALL_IRQS      1
@@ -54,6 +55,13 @@ struct kvm_s390_io_adapter_req {
        __u64 addr;
 };
 
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL           0
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA    0
+#define KVM_S390_VM_MEM_CLR_CMMA       1
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
        /* general purpose regs for s390 */
@@ -72,11 +80,31 @@ struct kvm_fpu {
        __u64 fprs[16];
 };
 
+#define KVM_GUESTDBG_USE_HW_BP         0x00010000
+
+#define KVM_HW_BP                      1
+#define KVM_HW_WP_WRITE                        2
+#define KVM_SINGLESTEP                 4
+
 struct kvm_debug_exit_arch {
+       __u64 addr;
+       __u8 type;
+       __u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+       __u64 addr;
+       __u64 phys_addr;
+       __u64 len;
+       __u8 type;
+       __u8 pad[7]; /* Should be set to 0 */
 };
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
+       __u32 nr_hw_bp;
+       __u32 pad; /* Should be set to 0 */
+       struct kvm_hw_breakpoint __user *hw_bp;
 };
 
 #define KVM_SYNC_PREFIX (1UL << 0)
index cc10cdd4d6a24ccbb9d02d12154da7eae25cb076..859a7ed36c4be73e0b5afc64870b72e712c34abe 100644 (file)
@@ -89,16 +89,22 @@ int main(void)
        DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
        DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
        DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
-       DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
+       DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
+       DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+       DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
        DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
-       DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
-       DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
+       DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
+       DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+       DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
+       DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+       DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
        DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
        DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
        DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
        DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
        DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
        DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+       DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
        DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
        DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
        DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
@@ -156,6 +162,8 @@ int main(void)
 #ifdef CONFIG_32BIT
        DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
 #else /* CONFIG_32BIT */
+       DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+       DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
        DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
        DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
        DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
index 1662038516c0db29d59a4a87dce89f374428cd51..e66f046b9c434fdc0b2fe1946fc5f78564c89809 100644 (file)
@@ -391,8 +391,8 @@ ENTRY(pgm_check_handler)
        jz      pgm_kprobe
        oi      __TI_flags+3(%r12),_TIF_PER_TRAP
        mvc     __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
-       mvc     __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
-       mvc     __THREAD_per_paid(1,%r1),__LC_PER_PAID
+       mvc     __THREAD_per_cause(2,%r1),__LC_PER_CODE
+       mvc     __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
 0:     REENABLE_IRQS
        xc      __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
        l       %r1,BASED(.Ljump_table)
index 5963e43618bb0df3ca790ffa1ce65a9fefd86b0b..3c34753de6ade9ab09d59012b771a849fb89a683 100644 (file)
@@ -423,8 +423,8 @@ ENTRY(pgm_check_handler)
        jz      pgm_kprobe
        oi      __TI_flags+7(%r12),_TIF_PER_TRAP
        mvc     __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
-       mvc     __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
-       mvc     __THREAD_per_paid(1,%r14),__LC_PER_PAID
+       mvc     __THREAD_per_cause(2,%r14),__LC_PER_CODE
+       mvc     __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
 0:     REENABLE_IRQS
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        larl    %r1,pgm_check_table
index d3adb37e93a4c99a8d1debd8bf06069ad1d38a24..b3b553469650888fd31df5d975fe4b93b2399f70 100644 (file)
@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o
+
 obj-$(CONFIG_KVM) += kvm.o
index 08dfc839a6cfeeb3655f64d850ce1ed6e60d49cc..5521ace8b60d1901c3dea9b80abcf7b1c2dc131d 100644 (file)
@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
        int rc;
        u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
        u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
-       unsigned long hva_token = KVM_HVA_ERR_BAD;
 
        if (vcpu->run->s.regs.gprs[rx] & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
        if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
                    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
                        return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-               hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
-               if (kvm_is_error_hva(hva_token))
+               if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
                        return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
                vcpu->arch.pfault_token = parm.token_addr;
@@ -167,17 +166,11 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 
        VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
        switch (subcode) {
-       case 0:
-       case 1:
-               page_table_reset_pgste(current->mm, 0, TASK_SIZE);
-               return -EOPNOTSUPP;
        case 3:
                vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
-               page_table_reset_pgste(current->mm, 0, TASK_SIZE);
                break;
        case 4:
                vcpu->run->s390_reset_flags = 0;
-               page_table_reset_pgste(current->mm, 0, TASK_SIZE);
                break;
        default:
                return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644 (file)
index 0000000..691fdb7
--- /dev/null
@@ -0,0 +1,645 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+union asce {
+       unsigned long val;
+       struct {
+               unsigned long origin : 52; /* Region- or Segment-Table Origin */
+               unsigned long    : 2;
+               unsigned long g  : 1; /* Subspace Group Control */
+               unsigned long p  : 1; /* Private Space Control */
+               unsigned long s  : 1; /* Storage-Alteration-Event Control */
+               unsigned long x  : 1; /* Space-Switch-Event Control */
+               unsigned long r  : 1; /* Real-Space Control */
+               unsigned long    : 1;
+               unsigned long dt : 2; /* Designation-Type Control */
+               unsigned long tl : 2; /* Region- or Segment-Table Length */
+       };
+};
+
+enum {
+       ASCE_TYPE_SEGMENT = 0,
+       ASCE_TYPE_REGION3 = 1,
+       ASCE_TYPE_REGION2 = 2,
+       ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+       unsigned long val;
+       struct {
+               unsigned long rto: 52;/* Region-Table Origin */
+               unsigned long    : 2;
+               unsigned long p  : 1; /* DAT-Protection Bit */
+               unsigned long    : 1;
+               unsigned long tf : 2; /* Region-Second-Table Offset */
+               unsigned long i  : 1; /* Region-Invalid Bit */
+               unsigned long    : 1;
+               unsigned long tt : 2; /* Table-Type Bits */
+               unsigned long tl : 2; /* Region-Second-Table Length */
+       };
+};
+
+union region2_table_entry {
+       unsigned long val;
+       struct {
+               unsigned long rto: 52;/* Region-Table Origin */
+               unsigned long    : 2;
+               unsigned long p  : 1; /* DAT-Protection Bit */
+               unsigned long    : 1;
+               unsigned long tf : 2; /* Region-Third-Table Offset */
+               unsigned long i  : 1; /* Region-Invalid Bit */
+               unsigned long    : 1;
+               unsigned long tt : 2; /* Table-Type Bits */
+               unsigned long tl : 2; /* Region-Third-Table Length */
+       };
+};
+
+struct region3_table_entry_fc0 {
+       unsigned long sto: 52;/* Segment-Table Origin */
+       unsigned long    : 1;
+       unsigned long fc : 1; /* Format-Control */
+       unsigned long p  : 1; /* DAT-Protection Bit */
+       unsigned long    : 1;
+       unsigned long tf : 2; /* Segment-Table Offset */
+       unsigned long i  : 1; /* Region-Invalid Bit */
+       unsigned long cr : 1; /* Common-Region Bit */
+       unsigned long tt : 2; /* Table-Type Bits */
+       unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+       unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+       unsigned long    : 14;
+       unsigned long av : 1; /* ACCF-Validity Control */
+       unsigned long acc: 4; /* Access-Control Bits */
+       unsigned long f  : 1; /* Fetch-Protection Bit */
+       unsigned long fc : 1; /* Format-Control */
+       unsigned long p  : 1; /* DAT-Protection Bit */
+       unsigned long co : 1; /* Change-Recording Override */
+       unsigned long    : 2;
+       unsigned long i  : 1; /* Region-Invalid Bit */
+       unsigned long cr : 1; /* Common-Region Bit */
+       unsigned long tt : 2; /* Table-Type Bits */
+       unsigned long    : 2;
+};
+
+union region3_table_entry {
+       unsigned long val;
+       struct region3_table_entry_fc0 fc0;
+       struct region3_table_entry_fc1 fc1;
+       struct {
+               unsigned long    : 53;
+               unsigned long fc : 1; /* Format-Control */
+               unsigned long    : 4;
+               unsigned long i  : 1; /* Region-Invalid Bit */
+               unsigned long cr : 1; /* Common-Region Bit */
+               unsigned long tt : 2; /* Table-Type Bits */
+               unsigned long    : 2;
+       };
+};
+
+struct segment_entry_fc0 {
+       unsigned long pto: 53;/* Page-Table Origin */
+       unsigned long fc : 1; /* Format-Control */
+       unsigned long p  : 1; /* DAT-Protection Bit */
+       unsigned long    : 3;
+       unsigned long i  : 1; /* Segment-Invalid Bit */
+       unsigned long cs : 1; /* Common-Segment Bit */
+       unsigned long tt : 2; /* Table-Type Bits */
+       unsigned long    : 2;
+};
+
+struct segment_entry_fc1 {
+       unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+       unsigned long    : 3;
+       unsigned long av : 1; /* ACCF-Validity Control */
+       unsigned long acc: 4; /* Access-Control Bits */
+       unsigned long f  : 1; /* Fetch-Protection Bit */
+       unsigned long fc : 1; /* Format-Control */
+       unsigned long p  : 1; /* DAT-Protection Bit */
+       unsigned long co : 1; /* Change-Recording Override */
+       unsigned long    : 2;
+       unsigned long i  : 1; /* Segment-Invalid Bit */
+       unsigned long cs : 1; /* Common-Segment Bit */
+       unsigned long tt : 2; /* Table-Type Bits */
+       unsigned long    : 2;
+};
+
+union segment_table_entry {
+       unsigned long val;
+       struct segment_entry_fc0 fc0;
+       struct segment_entry_fc1 fc1;
+       struct {
+               unsigned long    : 53;
+               unsigned long fc : 1; /* Format-Control */
+               unsigned long    : 4;
+               unsigned long i  : 1; /* Segment-Invalid Bit */
+               unsigned long cs : 1; /* Common-Segment Bit */
+               unsigned long tt : 2; /* Table-Type Bits */
+               unsigned long    : 2;
+       };
+};
+
+enum {
+       TABLE_TYPE_SEGMENT = 0,
+       TABLE_TYPE_REGION3 = 1,
+       TABLE_TYPE_REGION2 = 2,
+       TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+       unsigned long val;
+       struct {
+               unsigned long pfra : 52; /* Page-Frame Real Address */
+               unsigned long z  : 1; /* Zero Bit */
+               unsigned long i  : 1; /* Page-Invalid Bit */
+               unsigned long p  : 1; /* DAT-Protection Bit */
+               unsigned long co : 1; /* Change-Recording Override */
+               unsigned long    : 8;
+       };
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+       unsigned long addr;
+       struct {
+               unsigned long rfx : 11;
+               unsigned long rsx : 11;
+               unsigned long rtx : 11;
+               unsigned long sx  : 11;
+               unsigned long px  : 8;
+               unsigned long bx  : 12;
+       };
+       struct {
+               unsigned long rfx01 : 2;
+               unsigned long       : 9;
+               unsigned long rsx01 : 2;
+               unsigned long       : 9;
+               unsigned long rtx01 : 2;
+               unsigned long       : 9;
+               unsigned long sx01  : 2;
+               unsigned long       : 29;
+       };
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+       unsigned long addr;
+       unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+       unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+       unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+static int ipte_lock_count;
+static DEFINE_MUTEX(ipte_mutex);
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+       union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+       if (vcpu->arch.sie_block->eca & 1)
+               return ic->kh != 0;
+       return ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+       union ipte_control old, new, *ic;
+
+       mutex_lock(&ipte_mutex);
+       ipte_lock_count++;
+       if (ipte_lock_count > 1)
+               goto out;
+       ic = &vcpu->kvm->arch.sca->ipte_control;
+       do {
+               old = ACCESS_ONCE(*ic);
+               while (old.k) {
+                       cond_resched();
+                       old = ACCESS_ONCE(*ic);
+               }
+               new = old;
+               new.k = 1;
+       } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+       mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+       union ipte_control old, new, *ic;
+
+       mutex_lock(&ipte_mutex);
+       ipte_lock_count--;
+       if (ipte_lock_count)
+               goto out;
+       ic = &vcpu->kvm->arch.sca->ipte_control;
+       do {
+               new = old = ACCESS_ONCE(*ic);
+               new.k = 0;
+       } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+       if (!ipte_lock_count)
+               wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+       mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+       union ipte_control old, new, *ic;
+
+       ic = &vcpu->kvm->arch.sca->ipte_control;
+       do {
+               old = ACCESS_ONCE(*ic);
+               while (old.kg) {
+                       cond_resched();
+                       old = ACCESS_ONCE(*ic);
+               }
+               new = old;
+               new.k = 1;
+               new.kh++;
+       } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+       union ipte_control old, new, *ic;
+
+       ic = &vcpu->kvm->arch.sca->ipte_control;
+       do {
+               new = old = ACCESS_ONCE(*ic);
+               new.kh--;
+               if (!new.kh)
+                       new.k = 0;
+       } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+       if (!new.kh)
+               wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+static void ipte_lock(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.sie_block->eca & 1)
+               ipte_lock_siif(vcpu);
+       else
+               ipte_lock_simple(vcpu);
+}
+
+static void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.sie_block->eca & 1)
+               ipte_unlock_siif(vcpu);
+       else
+               ipte_unlock_simple(vcpu);
+}
+
+static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+{
+       switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+       case PSW_AS_PRIMARY:
+               return vcpu->arch.sie_block->gcr[1];
+       case PSW_AS_SECONDARY:
+               return vcpu->arch.sie_block->gcr[7];
+       case PSW_AS_HOME:
+               return vcpu->arch.sie_block->gcr[13];
+       }
+       return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+       return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecuture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ *         - a negative value if guest access failed due to e.g. broken
+ *           guest mapping
+ *         - a positve value if an access exception happened. In this case
+ *           the returned value is the program interruption code as defined
+ *           by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+                                    unsigned long *gpa, int write)
+{
+       union vaddress vaddr = {.addr = gva};
+       union raddress raddr = {.addr = gva};
+       union page_table_entry pte;
+       int dat_protection = 0;
+       union ctlreg0 ctlreg0;
+       unsigned long ptr;
+       int edat1, edat2;
+       union asce asce;
+
+       ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+       edat1 = ctlreg0.edat && test_vfacility(8);
+       edat2 = edat1 && test_vfacility(78);
+       asce.val = get_vcpu_asce(vcpu);
+       if (asce.r)
+               goto real_address;
+       ptr = asce.origin * 4096;
+       switch (asce.dt) {
+       case ASCE_TYPE_REGION1:
+               if (vaddr.rfx01 > asce.tl)
+                       return PGM_REGION_FIRST_TRANS;
+               ptr += vaddr.rfx * 8;
+               break;
+       case ASCE_TYPE_REGION2:
+               if (vaddr.rfx)
+                       return PGM_ASCE_TYPE;
+               if (vaddr.rsx01 > asce.tl)
+                       return PGM_REGION_SECOND_TRANS;
+               ptr += vaddr.rsx * 8;
+               break;
+       case ASCE_TYPE_REGION3:
+               if (vaddr.rfx || vaddr.rsx)
+                       return PGM_ASCE_TYPE;
+               if (vaddr.rtx01 > asce.tl)
+                       return PGM_REGION_THIRD_TRANS;
+               ptr += vaddr.rtx * 8;
+               break;
+       case ASCE_TYPE_SEGMENT:
+               if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+                       return PGM_ASCE_TYPE;
+               if (vaddr.sx01 > asce.tl)
+                       return PGM_SEGMENT_TRANSLATION;
+               ptr += vaddr.sx * 8;
+               break;
+       }
+       switch (asce.dt) {
+       case ASCE_TYPE_REGION1: {
+               union region1_table_entry rfte;
+
+               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+                       return PGM_ADDRESSING;
+               if (deref_table(vcpu->kvm, ptr, &rfte.val))
+                       return -EFAULT;
+               if (rfte.i)
+                       return PGM_REGION_FIRST_TRANS;
+               if (rfte.tt != TABLE_TYPE_REGION1)
+                       return PGM_TRANSLATION_SPEC;
+               if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+                       return PGM_REGION_SECOND_TRANS;
+               if (edat1)
+                       dat_protection |= rfte.p;
+               ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+       }
+               /* fallthrough */
+       case ASCE_TYPE_REGION2: {
+               union region2_table_entry rste;
+
+               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+                       return PGM_ADDRESSING;
+               if (deref_table(vcpu->kvm, ptr, &rste.val))
+                       return -EFAULT;
+               if (rste.i)
+                       return PGM_REGION_SECOND_TRANS;
+               if (rste.tt != TABLE_TYPE_REGION2)
+                       return PGM_TRANSLATION_SPEC;
+               if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+                       return PGM_REGION_THIRD_TRANS;
+               if (edat1)
+                       dat_protection |= rste.p;
+               ptr = rste.rto * 4096 + vaddr.rtx * 8;
+       }
+               /* fallthrough */
+       case ASCE_TYPE_REGION3: {
+               union region3_table_entry rtte;
+
+               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+                       return PGM_ADDRESSING;
+               if (deref_table(vcpu->kvm, ptr, &rtte.val))
+                       return -EFAULT;
+               if (rtte.i)
+                       return PGM_REGION_THIRD_TRANS;
+               if (rtte.tt != TABLE_TYPE_REGION3)
+                       return PGM_TRANSLATION_SPEC;
+               if (rtte.cr && asce.p && edat2)
+                       return PGM_TRANSLATION_SPEC;
+               if (rtte.fc && edat2) {
+                       dat_protection |= rtte.fc1.p;
+                       raddr.rfaa = rtte.fc1.rfaa;
+                       goto absolute_address;
+               }
+               if (vaddr.sx01 < rtte.fc0.tf)
+                       return PGM_SEGMENT_TRANSLATION;
+               if (vaddr.sx01 > rtte.fc0.tl)
+                       return PGM_SEGMENT_TRANSLATION;
+               if (edat1)
+                       dat_protection |= rtte.fc0.p;
+               ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+       }
+               /* fallthrough */
+       case ASCE_TYPE_SEGMENT: {
+               union segment_table_entry ste;
+
+               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+                       return PGM_ADDRESSING;
+               if (deref_table(vcpu->kvm, ptr, &ste.val))
+                       return -EFAULT;
+               if (ste.i)
+                       return PGM_SEGMENT_TRANSLATION;
+               if (ste.tt != TABLE_TYPE_SEGMENT)
+                       return PGM_TRANSLATION_SPEC;
+               if (ste.cs && asce.p)
+                       return PGM_TRANSLATION_SPEC;
+               if (ste.fc && edat1) {
+                       dat_protection |= ste.fc1.p;
+                       raddr.sfaa = ste.fc1.sfaa;
+                       goto absolute_address;
+               }
+               dat_protection |= ste.fc0.p;
+               ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+       }
+       }
+       if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               return PGM_ADDRESSING;
+       if (deref_table(vcpu->kvm, ptr, &pte.val))
+               return -EFAULT;
+       if (pte.i)
+               return PGM_PAGE_TRANSLATION;
+       if (pte.z)
+               return PGM_TRANSLATION_SPEC;
+       if (pte.co && !edat1)
+               return PGM_TRANSLATION_SPEC;
+       dat_protection |= pte.p;
+       raddr.pfra = pte.pfra;
+real_address:
+       raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+       if (write && dat_protection)
+               return PGM_PROTECTION;
+       if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+               return PGM_ADDRESSING;
+       *gpa = raddr.addr;
+       return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+       /* Check for address ranges 0..511 and 4096..4607 */
+       return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+{
+       union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       union asce asce;
+
+       if (!ctlreg0.lap)
+               return 0;
+       asce.val = get_vcpu_asce(vcpu);
+       if (psw_bits(*psw).t && asce.p)
+               return 0;
+       return 1;
+}
+
+struct trans_exc_code_bits {
+       unsigned long addr : 52; /* Translation-exception Address */
+       unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+       unsigned long      : 7;
+       unsigned long b61  : 1;
+       unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+       FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+       FSI_STORE   = 1, /* Exception was due to store operation */
+       FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+                           unsigned long *pages, unsigned long nr_pages,
+                           int write)
+{
+       struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       struct trans_exc_code_bits *tec_bits;
+       int lap_enabled, rc;
+
+       memset(pgm, 0, sizeof(*pgm));
+       tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+       tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+       tec_bits->as = psw_bits(*psw).as;
+       lap_enabled = low_address_protection_enabled(vcpu);
+       while (nr_pages) {
+               ga = kvm_s390_logical_to_effective(vcpu, ga);
+               tec_bits->addr = ga >> PAGE_SHIFT;
+               if (write && lap_enabled && is_low_address(ga)) {
+                       pgm->code = PGM_PROTECTION;
+                       return pgm->code;
+               }
+               ga &= PAGE_MASK;
+               if (psw_bits(*psw).t) {
+                       rc = guest_translate(vcpu, ga, pages, write);
+                       if (rc < 0)
+                               return rc;
+                       if (rc == PGM_PROTECTION)
+                               tec_bits->b61 = 1;
+                       if (rc)
+                               pgm->code = rc;
+               } else {
+                       *pages = kvm_s390_real_to_abs(vcpu, ga);
+                       if (kvm_is_error_gpa(vcpu->kvm, *pages))
+                               pgm->code = PGM_ADDRESSING;
+               }
+               if (pgm->code)
+                       return pgm->code;
+               ga += PAGE_SIZE;
+               pages++;
+               nr_pages--;
+       }
+       return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+                unsigned long len, int write)
+{
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       unsigned long _len, nr_pages, gpa, idx;
+       unsigned long pages_array[2];
+       unsigned long *pages;
+       int need_ipte_lock;
+       union asce asce;
+       int rc;
+
+       if (!len)
+               return 0;
+       /* Access register mode is not supported yet. */
+       if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+               return -EOPNOTSUPP;
+       nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+       pages = pages_array;
+       if (nr_pages > ARRAY_SIZE(pages_array))
+               pages = vmalloc(nr_pages * sizeof(unsigned long));
+       if (!pages)
+               return -ENOMEM;
+       asce.val = get_vcpu_asce(vcpu);
+       need_ipte_lock = psw_bits(*psw).t && !asce.r;
+       if (need_ipte_lock)
+               ipte_lock(vcpu);
+       rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+       for (idx = 0; idx < nr_pages && !rc; idx++) {
+               gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+               _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+               if (write)
+                       rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+               else
+                       rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+               len -= _len;
+               ga += _len;
+               data += _len;
+       }
+       if (need_ipte_lock)
+               ipte_unlock(vcpu);
+       if (nr_pages > ARRAY_SIZE(pages_array))
+               vfree(pages);
+       return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+                     void *data, unsigned long len, int write)
+{
+       unsigned long _len, gpa;
+       int rc = 0;
+
+       while (len && !rc) {
+               gpa = kvm_s390_real_to_abs(vcpu, gra);
+               _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+               if (write)
+                       rc = write_guest_abs(vcpu, gpa, data, _len);
+               else
+                       rc = read_guest_abs(vcpu, gpa, data, _len);
+               len -= _len;
+               gra += _len;
+               data += _len;
+       }
+       return rc;
+}
index 374a439ccc6080a004c7593f6227bc0c799ff7a6..1079c8fc6d0d903f840a5d19b55e08f7534ec6fd 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * access guest memory
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
 
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
 #include "kvm-s390.h"
 
-/* Convert real to absolute address by applying the prefix of the CPU */
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
 static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
-                                                unsigned long gaddr)
+                                                unsigned long gra)
 {
-       unsigned long prefix  = vcpu->arch.sie_block->prefix;
-       if (gaddr < 2 * PAGE_SIZE)
-               gaddr += prefix;
-       else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
-               gaddr -= prefix;
-       return gaddr;
+       unsigned long prefix = vcpu->arch.sie_block->prefix;
+
+       if (gra < 2 * PAGE_SIZE)
+               gra += prefix;
+       else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+               gra -= prefix;
+       return gra;
 }
 
-static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
-                                         void __user *gptr,
-                                         int prefixing)
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+                                                         unsigned long ga)
 {
-       unsigned long gaddr = (unsigned long) gptr;
-       unsigned long uaddr;
-
-       if (prefixing)
-               gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
-       uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
-       if (IS_ERR_VALUE(uaddr))
-               uaddr = -EFAULT;
-       return (void __user *)uaddr;
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+       if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+               return ga;
+       if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+               return ga & ((1UL << 31) - 1);
+       return ga & ((1UL << 24) - 1);
 }
 
-#define get_guest(vcpu, x, gptr)                               \
-({                                                             \
-       __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
-       int __mask = sizeof(__typeof__(*(gptr))) - 1;           \
-       int __ret;                                              \
-                                                               \
-       if (IS_ERR((void __force *)__uptr)) {                   \
-               __ret = PTR_ERR((void __force *)__uptr);        \
-       } else {                                                \
-               BUG_ON((unsigned long)__uptr & __mask);         \
-               __ret = get_user(x, __uptr);                    \
-       }                                                       \
-       __ret;                                                  \
-})
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
 
-#define put_guest(vcpu, x, gptr)                               \
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *      the guest memory mapping is broken. In any case the best solution
+ *      would be to terminate the guest.
+ *      It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra)                             \
 ({                                                             \
-       __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
-       int __mask = sizeof(__typeof__(*(gptr))) - 1;           \
-       int __ret;                                              \
+       struct kvm_vcpu *__vcpu = (vcpu);                       \
+       __typeof__(*(gra)) __x = (x);                           \
+       unsigned long __gpa;                                    \
                                                                \
-       if (IS_ERR((void __force *)__uptr)) {                   \
-               __ret = PTR_ERR((void __force *)__uptr);        \
-       } else {                                                \
-               BUG_ON((unsigned long)__uptr & __mask);         \
-               __ret = put_user(x, __uptr);                    \
-       }                                                       \
-       __ret;                                                  \
+       __gpa = (unsigned long)(gra);                           \
+       __gpa += __vcpu->arch.sie_block->prefix;                \
+       kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
 })
 
-static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
-                              unsigned long from, unsigned long len,
-                              int to_guest, int prefixing)
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *      the guest memory mapping is broken. In any case the best solution
+ *      would be to terminate the guest.
+ *      It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+                  unsigned long len)
+{
+       unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
+
+       return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *      the guest memory mapping is broken. In any case the best solution
+ *      would be to terminate the guest.
+ *      It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+                 unsigned long len)
+{
+       unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
+
+       return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+                unsigned long len, int write);
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+                     void *data, unsigned long len, int write);
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondory or home space (access register mode is currently not
+ * implemented).
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns:  - zero on success
+ *          - a negative value if e.g. the guest mapping is broken or in
+ *            case of out-of-memory. In this case the contents of pgm are
+ *            undefined. Also parts of @data may have been copied to guest
+ *            space.
+ *          - a positive value if an access exception happened. In this case
+ *            the returned value is the program interruption code and the
+ *            contents of pgm may be used to inject an exception into the
+ *            guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ *      guest space (this is also true, if the to be copied data would cross
+ *      one or more page boundaries in guest space).
+ *      Therefore this function may be used for nullifying and suppressing
+ *      instruction emulation.
+ *      It may also be used for terminating instructions, if it is undefined
+ *      if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+               unsigned long len)
+{
+       return access_guest(vcpu, ga, data, len, 1);
+}
+
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+              unsigned long len)
+{
+       return access_guest(vcpu, ga, data, len, 0);
+}
+
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+                   unsigned long len)
+{
+       return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+                  unsigned long len)
+{
+       return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+                    unsigned long len)
+{
+       return access_guest_real(vcpu, gra, data, len, 1);
+}
+
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+                   unsigned long len)
 {
-       unsigned long _len, rc;
-       void __user *uptr;
-
-       while (len) {
-               uptr = to_guest ? (void __user *)to : (void __user *)from;
-               uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
-               if (IS_ERR((void __force *)uptr))
-                       return -EFAULT;
-               _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
-               _len = min(_len, len);
-               if (to_guest)
-                       rc = copy_to_user((void __user *) uptr, (void *)from, _len);
-               else
-                       rc = copy_from_user((void *)to, (void __user *)uptr, _len);
-               if (rc)
-                       return -EFAULT;
-               len -= _len;
-               from += _len;
-               to += _len;
-       }
-       return 0;
+       return access_guest_real(vcpu, gra, data, len, 0);
 }
 
-#define copy_to_guest(vcpu, to, from, size) \
-       __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
-#define copy_from_guest(vcpu, to, from, size) \
-       __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
-#define copy_to_guest_absolute(vcpu, to, from, size) \
-       __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
-#define copy_from_guest_absolute(vcpu, to, from, size) \
-       __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
+int ipte_lock_held(struct kvm_vcpu *vcpu);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644 (file)
index 0000000..757ccef
--- /dev/null
@@ -0,0 +1,481 @@
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall intervall size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+       u64 estop;
+
+       if (len > 0)
+               len--;
+       else
+               len = 0;
+
+       estop = estart + len;
+
+       /* 0-0 range represents "not set" */
+       if ((*start == 0) && (*stop == 0)) {
+               *start = estart;
+               *stop = estop;
+       } else if (*start <= *stop) {
+               /* increase the existing range */
+               if (estart < *start)
+                       *start = estart;
+               if (estop > *stop)
+                       *stop = estop;
+       } else {
+               /* "overflowing" interval, whereby *stop > *start */
+               if (estart <= *stop) {
+                       if (estop > *stop)
+                               *stop = estop;
+               } else if (estop > *start) {
+                       if (estart < *start)
+                               *start = estart;
+               }
+               /* minimize the range */
+               else if ((estop - *stop) < (*start - estart))
+                       *stop = estop;
+               else
+                       *start = estart;
+       }
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+       unsigned long start, len;
+       u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+       u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+       u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+       int i;
+
+       if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+           vcpu->arch.guestdbg.hw_bp_info == NULL)
+               return;
+
+       /*
+        * If the guest is not interrested in branching events, we can savely
+        * limit them to the PER address range.
+        */
+       if (!(*cr9 & PER_EVENT_BRANCH))
+               *cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+       *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+       for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+               start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+               len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+               /*
+                * The instruction in front of the desired bp has to
+                * report instruction-fetching events
+                */
+               if (start < MAX_INST_SIZE) {
+                       len += start;
+                       start = 0;
+               } else {
+                       start -= MAX_INST_SIZE;
+                       len += MAX_INST_SIZE;
+               }
+
+               extend_address_range(cr10, cr11, start, len);
+       }
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+       unsigned long start, len;
+       u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+       u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+       u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+       int i;
+
+       if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+           vcpu->arch.guestdbg.hw_wp_info == NULL)
+               return;
+
+       /* if host uses storage alternation for special address
+        * spaces, enable all events and give all to the guest */
+       if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+               *cr9 &= ~PER_CONTROL_ALTERATION;
+               *cr10 = 0;
+               *cr11 = PSW_ADDR_INSN;
+       } else {
+               *cr9 &= ~PER_CONTROL_ALTERATION;
+               *cr9 |= PER_EVENT_STORE;
+
+               for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+                       start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+                       len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+                       extend_address_range(cr10, cr11, start, len);
+               }
+       }
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+       vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+       vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+       vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+       vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+       vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+       vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+       /*
+        * TODO: if guest psw has per enabled, otherwise 0s!
+        * This reduces the amount of reported events.
+        * Need to intercept all psw changes!
+        */
+
+       if (guestdbg_sstep_enabled(vcpu)) {
+               /* disable timer (clock-comparator) interrupts */
+               vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
+               vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+               vcpu->arch.sie_block->gcr[10] = 0;
+               vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+       }
+
+       if (guestdbg_hw_bp_enabled(vcpu)) {
+               enable_all_hw_bp(vcpu);
+               enable_all_hw_wp(vcpu);
+       }
+
+       /* TODO: Instruction-fetching-nullification not allowed for now */
+       if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+               vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+                           struct kvm_hw_breakpoint *bp_data,
+                           struct kvm_hw_wp_info_arch *wp_info)
+{
+       int ret = 0;
+       wp_info->len = bp_data->len;
+       wp_info->addr = bp_data->addr;
+       wp_info->phys_addr = bp_data->phys_addr;
+       wp_info->old_data = NULL;
+
+       if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+               return -EINVAL;
+
+       wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+       if (!wp_info->old_data)
+               return -ENOMEM;
+       /* try to backup the original value */
+       ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
+                        wp_info->len);
+       if (ret) {
+               kfree(wp_info->old_data);
+               wp_info->old_data = NULL;
+       }
+
+       return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+                           struct kvm_guest_debug *dbg)
+{
+       int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+       struct kvm_hw_breakpoint *bp_data = NULL;
+       struct kvm_hw_wp_info_arch *wp_info = NULL;
+       struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+       if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+               return 0;
+       else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+               return -EINVAL;
+
+       size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
+       bp_data = kmalloc(size, GFP_KERNEL);
+       if (!bp_data) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       ret = copy_from_user(bp_data, dbg->arch.hw_bp, size);
+       if (ret)
+               goto error;
+
+       for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+               switch (bp_data[i].type) {
+               case KVM_HW_WP_WRITE:
+                       nr_wp++;
+                       break;
+               case KVM_HW_BP:
+                       nr_bp++;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
+       if (size > 0) {
+               wp_info = kmalloc(size, GFP_KERNEL);
+               if (!wp_info) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
+       }
+       size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
+       if (size > 0) {
+               bp_info = kmalloc(size, GFP_KERNEL);
+               if (!bp_info) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
+       }
+
+       for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+               switch (bp_data[i].type) {
+               case KVM_HW_WP_WRITE:
+                       ret = __import_wp_info(vcpu, &bp_data[i],
+                                              &wp_info[nr_wp]);
+                       if (ret)
+                               goto error;
+                       nr_wp++;
+                       break;
+               case KVM_HW_BP:
+                       bp_info[nr_bp].len = bp_data[i].len;
+                       bp_info[nr_bp].addr = bp_data[i].addr;
+                       nr_bp++;
+                       break;
+               }
+       }
+
+       vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+       vcpu->arch.guestdbg.hw_bp_info = bp_info;
+       vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+       vcpu->arch.guestdbg.hw_wp_info = wp_info;
+       return 0;
+error:
+       kfree(bp_data);
+       kfree(wp_info);
+       kfree(bp_info);
+       return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+       int i;
+       struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+       for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+               hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+               kfree(hw_wp_info->old_data);
+               hw_wp_info->old_data = NULL;
+       }
+       kfree(vcpu->arch.guestdbg.hw_wp_info);
+       vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+       kfree(vcpu->arch.guestdbg.hw_bp_info);
+       vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+       vcpu->arch.guestdbg.nr_hw_wp = 0;
+       vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+       if (a <= b)
+               return (addr >= a) && (addr <= b);
+       else
+               /* "overflowing" interval */
+               return (addr <= a) && (addr >= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+                                             unsigned long addr)
+{
+       struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+       int i;
+
+       if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+               return NULL;
+
+       for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+               /* addr is directly the start or in the range of a bp */
+               if (addr == bp_info->addr)
+                       goto found;
+               if (bp_info->len > 0 &&
+                   in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+                       goto found;
+
+               bp_info++;
+       }
+
+       return NULL;
+found:
+       return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+       int i;
+       struct kvm_hw_wp_info_arch *wp_info = NULL;
+       void *temp = NULL;
+
+       if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+               return NULL;
+
+       for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+               wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+               if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+                       continue;
+
+               temp = kmalloc(wp_info->len, GFP_KERNEL);
+               if (!temp)
+                       continue;
+
+               /* refetch the wp data and compare it to the old value */
+               if (!read_guest(vcpu, wp_info->phys_addr, temp,
+                               wp_info->len)) {
+                       if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+                               kfree(temp);
+                               return wp_info;
+                       }
+               }
+               kfree(temp);
+               temp = NULL;
+       }
+
+       return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+       vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+       vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define per_bp_event(code) \
+                       (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+#define per_write_wp_event(code) \
+                       (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu)
+{
+       u32 perc = (vcpu->arch.sie_block->perc << 24);
+       struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+       struct kvm_hw_wp_info_arch *wp_info = NULL;
+       struct kvm_hw_bp_info_arch *bp_info = NULL;
+       unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+       unsigned long peraddr = vcpu->arch.sie_block->peraddr;
+
+       if (guestdbg_hw_bp_enabled(vcpu)) {
+               if (per_write_wp_event(perc) &&
+                   vcpu->arch.guestdbg.nr_hw_wp > 0) {
+                       wp_info = any_wp_changed(vcpu);
+                       if (wp_info) {
+                               debug_exit->addr = wp_info->addr;
+                               debug_exit->type = KVM_HW_WP_WRITE;
+                               goto exit_required;
+                       }
+               }
+               if (per_bp_event(perc) &&
+                        vcpu->arch.guestdbg.nr_hw_bp > 0) {
+                       bp_info = find_hw_bp(vcpu, addr);
+                       /* remove duplicate events if PC==PER address */
+                       if (bp_info && (addr != peraddr)) {
+                               debug_exit->addr = addr;
+                               debug_exit->type = KVM_HW_BP;
+                               vcpu->arch.guestdbg.last_bp = addr;
+                               goto exit_required;
+                       }
+                       /* breakpoint missed */
+                       bp_info = find_hw_bp(vcpu, peraddr);
+                       if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+                               debug_exit->addr = peraddr;
+                               debug_exit->type = KVM_HW_BP;
+                               goto exit_required;
+                       }
+               }
+       }
+       if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+               debug_exit->addr = addr;
+               debug_exit->type = KVM_SINGLESTEP;
+               goto exit_required;
+       }
+
+       return 0;
+exit_required:
+       return 1;
+}
+
+#define guest_per_enabled(vcpu) \
+                            (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+static void filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+       u32 perc = vcpu->arch.sie_block->perc << 24;
+       u64 peraddr = vcpu->arch.sie_block->peraddr;
+       u64 addr = vcpu->arch.sie_block->gpsw.addr;
+       u64 cr9 = vcpu->arch.sie_block->gcr[9];
+       u64 cr10 = vcpu->arch.sie_block->gcr[10];
+       u64 cr11 = vcpu->arch.sie_block->gcr[11];
+       /* filter all events, demanded by the guest */
+       u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+
+       if (!guest_per_enabled(vcpu))
+               guest_perc = 0;
+
+       /* filter "successful-branching" events */
+       if (guest_perc & PER_EVENT_BRANCH &&
+           cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+           !in_addr_range(addr, cr10, cr11))
+               guest_perc &= ~PER_EVENT_BRANCH;
+
+       /* filter "instruction-fetching" events */
+       if (guest_perc & PER_EVENT_IFETCH &&
+           !in_addr_range(peraddr, cr10, cr11))
+               guest_perc &= ~PER_EVENT_IFETCH;
+
+       /* All other PER events will be given to the guest */
+       /* TODO: Check alterated address/address space */
+
+       vcpu->arch.sie_block->perc = guest_perc >> 24;
+
+       if (!guest_perc)
+               vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+}
+
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+       if (debug_exit_required(vcpu))
+               vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+       filter_guest_per_event(vcpu);
+}
index eeb1ac7d8fa48798a79c18a5015aae5aa372c455..30e1c5eb726a2ac3413e3f695cfc5303b756c70e 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 
 #include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
 
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -29,6 +30,7 @@ static const intercept_handler_t instruction_handlers[256] = {
        [0x83] = kvm_s390_handle_diag,
        [0xae] = kvm_s390_handle_sigp,
        [0xb2] = kvm_s390_handle_b2,
+       [0xb6] = kvm_s390_handle_stctl,
        [0xb7] = kvm_s390_handle_lctl,
        [0xb9] = kvm_s390_handle_b9,
        [0xe5] = kvm_s390_handle_e5,
@@ -109,22 +111,112 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
        return -EOPNOTSUPP;
 }
 
+static void __extract_prog_irq(struct kvm_vcpu *vcpu,
+                              struct kvm_s390_pgm_info *pgm_info)
+{
+       memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
+       pgm_info->code = vcpu->arch.sie_block->iprcc;
+
+       switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+       case PGM_AFX_TRANSLATION:
+       case PGM_ASX_TRANSLATION:
+       case PGM_EX_TRANSLATION:
+       case PGM_LFX_TRANSLATION:
+       case PGM_LSTE_SEQUENCE:
+       case PGM_LSX_TRANSLATION:
+       case PGM_LX_TRANSLATION:
+       case PGM_PRIMARY_AUTHORITY:
+       case PGM_SECONDARY_AUTHORITY:
+       case PGM_SPACE_SWITCH:
+               pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+               break;
+       case PGM_ALEN_TRANSLATION:
+       case PGM_ALE_SEQUENCE:
+       case PGM_ASTE_INSTANCE:
+       case PGM_ASTE_SEQUENCE:
+       case PGM_ASTE_VALIDITY:
+       case PGM_EXTENDED_AUTHORITY:
+               pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+               break;
+       case PGM_ASCE_TYPE:
+       case PGM_PAGE_TRANSLATION:
+       case PGM_REGION_FIRST_TRANS:
+       case PGM_REGION_SECOND_TRANS:
+       case PGM_REGION_THIRD_TRANS:
+       case PGM_SEGMENT_TRANSLATION:
+               pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+               pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+               pgm_info->op_access_id  = vcpu->arch.sie_block->oai;
+               break;
+       case PGM_MONITOR:
+               pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
+               pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+               break;
+       case PGM_DATA:
+               pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+               break;
+       case PGM_PROTECTION:
+               pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+               pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+               break;
+       default:
+               break;
+       }
+
+       if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+               pgm_info->per_code = vcpu->arch.sie_block->perc;
+               pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
+               pgm_info->per_address = vcpu->arch.sie_block->peraddr;
+               pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+       }
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_itdb *itdb;
+       int rc;
+
+       if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+               return 0;
+       if (current->thread.per_flags & PER_FLAG_NO_TE)
+               return 0;
+       itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+       rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+       if (rc)
+               return rc;
+       memset(itdb, 0, sizeof(*itdb));
+
+       return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_pgm_info pgm_info;
+       int rc;
+
        vcpu->stat.exit_program_interruption++;
 
-       /* Restore ITDB to Program-Interruption TDB in guest memory */
-       if (IS_TE_ENABLED(vcpu) &&
-           !(current->thread.per_flags & PER_FLAG_NO_TE) &&
-           IS_ITDB_VALID(vcpu)) {
-               copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
-                             sizeof(struct kvm_s390_itdb));
-               memset((void *) vcpu->arch.sie_block->itdba, 0,
-                      sizeof(struct kvm_s390_itdb));
+       if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+               kvm_s390_handle_per_event(vcpu);
+               /* the interrupt might have been filtered out completely */
+               if (vcpu->arch.sie_block->iprcc == 0)
+                       return 0;
        }
 
        trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
-       return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+
+       rc = handle_itdb(vcpu);
+       if (rc)
+               return rc;
+
+       __extract_prog_irq(vcpu, &pgm_info);
+       return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
 }
 
 static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
index 200a8f9390b68cb68556f6a319dd19fe90fa0457..077e4738ebdc45cfc80cf408d3210d97018bfda2 100644 (file)
@@ -56,6 +56,17 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+       if (psw_extint_disabled(vcpu) ||
+           !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+               return 0;
+       if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+               /* No timer interrupts when single stepping */
+               return 0;
+       return 1;
+}
+
 static u64 int_word_to_isc_bits(u32 int_word)
 {
        u8 isc = (int_word & 0x38000000) >> 27;
@@ -131,7 +142,13 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
                CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
                &vcpu->arch.sie_block->cpuflags);
        vcpu->arch.sie_block->lctl = 0x0000;
-       vcpu->arch.sie_block->ictl &= ~ICTL_LPSW;
+       vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+       if (guestdbg_enabled(vcpu)) {
+               vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+                                              LCTL_CR10 | LCTL_CR11);
+               vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+       }
 }
 
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -174,6 +191,106 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
        }
 }
 
+static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+                             struct kvm_s390_pgm_info *pgm_info)
+{
+       const unsigned short table[] = { 2, 4, 4, 6 };
+       int rc = 0;
+
+       switch (pgm_info->code & ~PGM_PER) {
+       case PGM_AFX_TRANSLATION:
+       case PGM_ASX_TRANSLATION:
+       case PGM_EX_TRANSLATION:
+       case PGM_LFX_TRANSLATION:
+       case PGM_LSTE_SEQUENCE:
+       case PGM_LSX_TRANSLATION:
+       case PGM_LX_TRANSLATION:
+       case PGM_PRIMARY_AUTHORITY:
+       case PGM_SECONDARY_AUTHORITY:
+       case PGM_SPACE_SWITCH:
+               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+                                 (u64 *)__LC_TRANS_EXC_CODE);
+               break;
+       case PGM_ALEN_TRANSLATION:
+       case PGM_ALE_SEQUENCE:
+       case PGM_ASTE_INSTANCE:
+       case PGM_ASTE_SEQUENCE:
+       case PGM_ASTE_VALIDITY:
+       case PGM_EXTENDED_AUTHORITY:
+               rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+                                 (u8 *)__LC_EXC_ACCESS_ID);
+               break;
+       case PGM_ASCE_TYPE:
+       case PGM_PAGE_TRANSLATION:
+       case PGM_REGION_FIRST_TRANS:
+       case PGM_REGION_SECOND_TRANS:
+       case PGM_REGION_THIRD_TRANS:
+       case PGM_SEGMENT_TRANSLATION:
+               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+                                 (u64 *)__LC_TRANS_EXC_CODE);
+               rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+                                  (u8 *)__LC_EXC_ACCESS_ID);
+               rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+                                  (u8 *)__LC_OP_ACCESS_ID);
+               break;
+       case PGM_MONITOR:
+               rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
+                                 (u64 *)__LC_MON_CLASS_NR);
+               rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+                                  (u64 *)__LC_MON_CODE);
+               break;
+       case PGM_DATA:
+               rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+                                 (u32 *)__LC_DATA_EXC_CODE);
+               break;
+       case PGM_PROTECTION:
+               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+                                 (u64 *)__LC_TRANS_EXC_CODE);
+               rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+                                  (u8 *)__LC_EXC_ACCESS_ID);
+               break;
+       }
+
+       if (pgm_info->code & PGM_PER) {
+               rc |= put_guest_lc(vcpu, pgm_info->per_code,
+                                  (u8 *) __LC_PER_CODE);
+               rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+                                  (u8 *)__LC_PER_ATMID);
+               rc |= put_guest_lc(vcpu, pgm_info->per_address,
+                                  (u64 *) __LC_PER_ADDRESS);
+               rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+                                  (u8 *) __LC_PER_ACCESS_ID);
+       }
+
+       switch (vcpu->arch.sie_block->icptcode) {
+       case ICPT_INST:
+       case ICPT_INSTPROGI:
+       case ICPT_OPEREXC:
+       case ICPT_PARTEXEC:
+       case ICPT_IOINST:
+               /* last instruction only stored for these icptcodes */
+               rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+                                  (u16 *) __LC_PGM_ILC);
+               break;
+       case ICPT_PROGI:
+               rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
+                                  (u16 *) __LC_PGM_ILC);
+               break;
+       default:
+               rc |= put_guest_lc(vcpu, 0,
+                                  (u16 *) __LC_PGM_ILC);
+       }
+
+       rc |= put_guest_lc(vcpu, pgm_info->code,
+                          (u16 *)__LC_PGM_INT_CODE);
+       rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+
+       return rc;
+}
+
 static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                                   struct kvm_s390_interrupt_info *inti)
 {
@@ -186,26 +303,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                vcpu->stat.deliver_emergency_signal++;
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 inti->emerg.code, 0);
-               rc  = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
-               rc |= put_guest(vcpu, inti->emerg.code,
-                               (u16 __user *)__LC_EXT_CPU_ADDR);
-               rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+               rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, inti->emerg.code,
+                                  (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_EXT_NEW_PSW, sizeof(psw_t));
                break;
        case KVM_S390_INT_EXTERNAL_CALL:
                VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
                vcpu->stat.deliver_external_call++;
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 inti->extcall.code, 0);
-               rc  = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
-               rc |= put_guest(vcpu, inti->extcall.code,
-                               (u16 __user *)__LC_EXT_CPU_ADDR);
-               rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_EXT_NEW_PSW, sizeof(psw_t));
+               rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, inti->extcall.code,
+                                  (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                                   &vcpu->arch.sie_block->gpsw,
+                                   sizeof(psw_t));
                break;
        case KVM_S390_INT_SERVICE:
                VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
@@ -213,37 +332,39 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                vcpu->stat.deliver_service_signal++;
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 inti->ext.ext_params, 0);
-               rc  = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
-               rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+               rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_EXT_NEW_PSW, sizeof(psw_t));
-               rc |= put_guest(vcpu, inti->ext.ext_params,
-                               (u32 __user *)__LC_EXT_PARAMS);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                                  (u32 *)__LC_EXT_PARAMS);
                break;
        case KVM_S390_INT_PFAULT_INIT:
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
                                                 inti->ext.ext_params2);
-               rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
-               rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
-               rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+               rc  = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_EXT_NEW_PSW, sizeof(psw_t));
-               rc |= put_guest(vcpu, inti->ext.ext_params2,
-                               (u64 __user *) __LC_EXT_PARAMS2);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                                  (u64 *) __LC_EXT_PARAMS2);
                break;
        case KVM_S390_INT_PFAULT_DONE:
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
                                                 inti->ext.ext_params2);
-               rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
-               rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
-               rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+               rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_EXT_NEW_PSW, sizeof(psw_t));
-               rc |= put_guest(vcpu, inti->ext.ext_params2,
-                               (u64 __user *) __LC_EXT_PARAMS2);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                                  (u64 *)__LC_EXT_PARAMS2);
                break;
        case KVM_S390_INT_VIRTIO:
                VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
@@ -252,16 +373,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 inti->ext.ext_params,
                                                 inti->ext.ext_params2);
-               rc  = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
-               rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
-               rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+               rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_EXT_NEW_PSW, sizeof(psw_t));
-               rc |= put_guest(vcpu, inti->ext.ext_params,
-                               (u32 __user *)__LC_EXT_PARAMS);
-               rc |= put_guest(vcpu, inti->ext.ext_params2,
-                               (u64 __user *)__LC_EXT_PARAMS2);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                                  (u32 *)__LC_EXT_PARAMS);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                                  (u64 *)__LC_EXT_PARAMS2);
                break;
        case KVM_S390_SIGP_STOP:
                VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
@@ -285,12 +407,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                vcpu->stat.deliver_restart_signal++;
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 0, 0);
-               rc  = copy_to_guest(vcpu,
-                                   offsetof(struct _lowcore, restart_old_psw),
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     offsetof(struct _lowcore, restart_psw),
-                                     sizeof(psw_t));
+               rc  = write_guest_lc(vcpu,
+                                    offsetof(struct _lowcore, restart_old_psw),
+                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+                                   &vcpu->arch.sie_block->gpsw,
+                                   sizeof(psw_t));
                atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
                break;
        case KVM_S390_PROGRAM_INT:
@@ -300,13 +422,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                vcpu->stat.deliver_program_int++;
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 inti->pgm.code, 0);
-               rc  = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
-               rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
-                               (u16 __user *)__LC_PGM_ILC);
-               rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_PGM_NEW_PSW, sizeof(psw_t));
+               rc = __deliver_prog_irq(vcpu, &inti->pgm);
                break;
 
        case KVM_S390_MCHK:
@@ -317,11 +433,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                                                 inti->mchk.mcic);
                rc  = kvm_s390_vcpu_store_status(vcpu,
                                                 KVM_S390_STORE_STATUS_PREFIXED);
-               rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
-               rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
+               rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
+               rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_MCK_NEW_PSW, sizeof(psw_t));
                break;
 
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -334,18 +451,20 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
                vcpu->stat.deliver_io_int++;
                trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
                                                 param0, param1);
-               rc  = put_guest(vcpu, inti->io.subchannel_id,
-                               (u16 __user *) __LC_SUBCHANNEL_ID);
-               rc |= put_guest(vcpu, inti->io.subchannel_nr,
-                               (u16 __user *) __LC_SUBCHANNEL_NR);
-               rc |= put_guest(vcpu, inti->io.io_int_parm,
-                               (u32 __user *) __LC_IO_INT_PARM);
-               rc |= put_guest(vcpu, inti->io.io_int_word,
-                               (u32 __user *) __LC_IO_INT_WORD);
-               rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                                     __LC_IO_NEW_PSW, sizeof(psw_t));
+               rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+                                  (u16 *)__LC_SUBCHANNEL_ID);
+               rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+                                  (u16 *)__LC_SUBCHANNEL_NR);
+               rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+                                  (u32 *)__LC_IO_INT_PARM);
+               rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+                                  (u32 *)__LC_IO_INT_WORD);
+               rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+                                   &vcpu->arch.sie_block->gpsw,
+                                   sizeof(psw_t));
                break;
        }
        default:
@@ -358,25 +477,21 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
        }
 }
 
-static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
        int rc;
 
-       if (psw_extint_disabled(vcpu))
-               return 0;
-       if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
-               return 0;
-       rc  = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-       rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-                             __LC_EXT_NEW_PSW, sizeof(psw_t));
+       rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw,
+                           sizeof(psw_t));
        if (rc) {
                printk("kvm: The guest lowcore is not mapped during interrupt "
                        "delivery, killing userspace\n");
                do_exit(SIGKILL);
        }
-       return 1;
 }
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
@@ -406,19 +521,20 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
                spin_unlock(&fi->lock);
        }
 
-       if ((!rc) && (vcpu->arch.sie_block->ckc <
-               get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
-               if ((!psw_extint_disabled(vcpu)) &&
-                       (vcpu->arch.sie_block->gcr[0] & 0x800ul))
-                       rc = 1;
-       }
+       if (!rc && kvm_cpu_has_pending_timer(vcpu))
+               rc = 1;
 
        return rc;
 }
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-       return 0;
+       if (!(vcpu->arch.sie_block->ckc <
+             get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+               return 0;
+       if (!ckc_interrupts_enabled(vcpu))
+               return 0;
+       return 1;
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -441,8 +557,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP; /* disabled wait */
        }
 
-       if (psw_extint_disabled(vcpu) ||
-           (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+       if (!ckc_interrupts_enabled(vcpu)) {
                VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
                goto no_timer;
        }
@@ -554,9 +669,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
                } while (deliver);
        }
 
-       if ((vcpu->arch.sie_block->ckc <
-               get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-               __try_deliver_ckc_interrupt(vcpu);
+       if (kvm_cpu_has_pending_timer(vcpu))
+               deliver_ckc_interrupt(vcpu);
 
        if (atomic_read(&fi->active)) {
                do {
@@ -660,6 +774,31 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
        return 0;
 }
 
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+                            struct kvm_s390_pgm_info *pgm_info)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_interrupt_info *inti;
+
+       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+       if (!inti)
+               return -ENOMEM;
+
+       VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
+                  pgm_info->code);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+                                  pgm_info->code, 0, 1);
+
+       inti->type = KVM_S390_PROGRAM_INT;
+       memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
+       spin_lock_bh(&li->lock);
+       list_add(&inti->list, &li->list);
+       atomic_set(&li->active, 1);
+       BUG_ON(waitqueue_active(li->wq));
+       spin_unlock_bh(&li->lock);
+       return 0;
+}
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                    u64 cr6, u64 schid)
 {
@@ -810,6 +949,12 @@ int kvm_s390_inject_vm(struct kvm *kvm,
        return __inject_vm(kvm, inti);
 }
 
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+                             struct kvm_s390_interrupt_info *inti)
+{
+       __inject_vm(kvm, inti);
+}
+
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
                         struct kvm_s390_interrupt *s390int)
 {
@@ -900,7 +1045,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-static void clear_floating_interrupts(struct kvm *kvm)
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
 {
        struct kvm_s390_float_interrupt *fi;
        struct kvm_s390_interrupt_info  *n, *inti = NULL;
@@ -1246,7 +1391,7 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
                break;
        case KVM_DEV_FLIC_CLEAR_IRQS:
                r = 0;
-               clear_floating_interrupts(dev->kvm);
+               kvm_s390_clear_float_irqs(dev->kvm);
                break;
        case KVM_DEV_FLIC_APF_ENABLE:
                dev->kvm->arch.gmap->pfault_enabled = 1;
index b3ecb8f5b6ce2bcefb4fe92a64b99d2012cdd770..b32c42cbc7063d9298a5484b672485ea3b5f04b0 100644 (file)
@@ -11,6 +11,7 @@
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ *               Jason J. Herne <jjherne@us.ibm.com>
  */
 
 #include <linux/compiler.h>
@@ -51,6 +52,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
        { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
        { "instruction_lctl", VCPU_STAT(instruction_lctl) },
+       { "instruction_stctl", VCPU_STAT(instruction_stctl) },
+       { "instruction_stctg", VCPU_STAT(instruction_stctg) },
        { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
        { "deliver_external_call", VCPU_STAT(deliver_external_call) },
        { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
@@ -66,6 +69,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "instruction_stpx", VCPU_STAT(instruction_stpx) },
        { "instruction_stap", VCPU_STAT(instruction_stap) },
        { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+       { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
        { "instruction_stsch", VCPU_STAT(instruction_stsch) },
        { "instruction_chsc", VCPU_STAT(instruction_chsc) },
        { "instruction_essa", VCPU_STAT(instruction_essa) },
@@ -90,7 +94,7 @@ unsigned long *vfacilities;
 static struct gmap_notifier gmap_notifier;
 
 /* test availability of vfacility */
-static inline int test_vfacility(unsigned long nr)
+int test_vfacility(unsigned long nr)
 {
        return __test_facility(nr, (void *) vfacilities);
 }
@@ -161,6 +165,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_ENABLE_CAP_VM:
+       case KVM_CAP_VM_ATTRIBUTES:
                r = 1;
                break;
        case KVM_CAP_NR_VCPUS:
@@ -179,6 +184,25 @@ int kvm_dev_ioctl_check_extension(long ext)
        return r;
 }
 
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot)
+{
+       gfn_t cur_gfn, last_gfn;
+       unsigned long address;
+       struct gmap *gmap = kvm->arch.gmap;
+
+       down_read(&gmap->mm->mmap_sem);
+       /* Loop over all guest pages */
+       last_gfn = memslot->base_gfn + memslot->npages;
+       for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
+               address = gfn_to_hva_memslot(memslot, cur_gfn);
+
+               if (gmap_test_and_clear_dirty(address, gmap))
+                       mark_page_dirty(kvm, cur_gfn);
+       }
+       up_read(&gmap->mm->mmap_sem);
+}
+
 /* Section: vm related */
 /*
  * Get (and clear) the dirty memory log for a memory slot.
@@ -186,7 +210,36 @@ int kvm_dev_ioctl_check_extension(long ext)
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                               struct kvm_dirty_log *log)
 {
-       return 0;
+       int r;
+       unsigned long n;
+       struct kvm_memory_slot *memslot;
+       int is_dirty = 0;
+
+       mutex_lock(&kvm->slots_lock);
+
+       r = -EINVAL;
+       if (log->slot >= KVM_USER_MEM_SLOTS)
+               goto out;
+
+       memslot = id_to_memslot(kvm->memslots, log->slot);
+       r = -ENOENT;
+       if (!memslot->dirty_bitmap)
+               goto out;
+
+       kvm_s390_sync_dirty_log(kvm, memslot);
+       r = kvm_get_dirty_log(kvm, log, &is_dirty);
+       if (r)
+               goto out;
+
+       /* Clear the dirty log */
+       if (is_dirty) {
+               n = kvm_dirty_bitmap_bytes(memslot);
+               memset(memslot->dirty_bitmap, 0, n);
+       }
+       r = 0;
+out:
+       mutex_unlock(&kvm->slots_lock);
+       return r;
 }
 
 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
@@ -208,11 +261,86 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
        return r;
 }
 
+static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       int ret;
+       unsigned int idx;
+       switch (attr->attr) {
+       case KVM_S390_VM_MEM_ENABLE_CMMA:
+               ret = -EBUSY;
+               mutex_lock(&kvm->lock);
+               if (atomic_read(&kvm->online_vcpus) == 0) {
+                       kvm->arch.use_cmma = 1;
+                       ret = 0;
+               }
+               mutex_unlock(&kvm->lock);
+               break;
+       case KVM_S390_VM_MEM_CLR_CMMA:
+               mutex_lock(&kvm->lock);
+               idx = srcu_read_lock(&kvm->srcu);
+               page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+               srcu_read_unlock(&kvm->srcu, idx);
+               mutex_unlock(&kvm->lock);
+               ret = 0;
+               break;
+       default:
+               ret = -ENXIO;
+               break;
+       }
+       return ret;
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       int ret;
+
+       switch (attr->group) {
+       case KVM_S390_VM_MEM_CTRL:
+               ret = kvm_s390_mem_control(kvm, attr);
+               break;
+       default:
+               ret = -ENXIO;
+               break;
+       }
+
+       return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       return -ENXIO;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       int ret;
+
+       switch (attr->group) {
+       case KVM_S390_VM_MEM_CTRL:
+               switch (attr->attr) {
+               case KVM_S390_VM_MEM_ENABLE_CMMA:
+               case KVM_S390_VM_MEM_CLR_CMMA:
+                       ret = 0;
+                       break;
+               default:
+                       ret = -ENXIO;
+                       break;
+               }
+               break;
+       default:
+               ret = -ENXIO;
+               break;
+       }
+
+       return ret;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
 {
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
+       struct kvm_device_attr attr;
        int r;
 
        switch (ioctl) {
@@ -245,6 +373,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
                }
                break;
        }
+       case KVM_SET_DEVICE_ATTR: {
+               r = -EFAULT;
+               if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+                       break;
+               r = kvm_s390_vm_set_attr(kvm, &attr);
+               break;
+       }
+       case KVM_GET_DEVICE_ATTR: {
+               r = -EFAULT;
+               if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+                       break;
+               r = kvm_s390_vm_get_attr(kvm, &attr);
+               break;
+       }
+       case KVM_HAS_DEVICE_ATTR: {
+               r = -EFAULT;
+               if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+                       break;
+               r = kvm_s390_vm_has_attr(kvm, &attr);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@ -291,6 +440,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        spin_lock_init(&kvm->arch.float_int.lock);
        INIT_LIST_HEAD(&kvm->arch.float_int.list);
+       init_waitqueue_head(&kvm->arch.ipte_wq);
 
        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
        VM_EVENT(kvm, 3, "%s", "vm created");
@@ -321,6 +471,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+       kvm_s390_clear_local_irqs(vcpu);
        kvm_clear_async_pf_completion_queue(vcpu);
        if (!kvm_is_ucontrol(vcpu->kvm)) {
                clear_bit(63 - vcpu->vcpu_id,
@@ -334,9 +485,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        if (kvm_is_ucontrol(vcpu->kvm))
                gmap_free(vcpu->arch.gmap);
 
-       if (vcpu->arch.sie_block->cbrlo)
-               __free_page(__pfn_to_page(
-                               vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
+       if (kvm_s390_cmma_enabled(vcpu->kvm))
+               kvm_s390_vcpu_unsetup_cmma(vcpu);
        free_page((unsigned long)(vcpu->arch.sie_block));
 
        kvm_vcpu_uninit(vcpu);
@@ -371,6 +521,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        if (!kvm_is_ucontrol(kvm))
                gmap_free(kvm->arch.gmap);
        kvm_s390_destroy_adapters(kvm);
+       kvm_s390_clear_float_irqs(kvm);
 }
 
 /* Section: vcpu related */
@@ -450,9 +601,26 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+       free_page(vcpu->arch.sie_block->cbrlo);
+       vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+       if (!vcpu->arch.sie_block->cbrlo)
+               return -ENOMEM;
+
+       vcpu->arch.sie_block->ecb2 |= 0x80;
+       vcpu->arch.sie_block->ecb2 &= ~0x08;
+       return 0;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-       struct page *cbrl;
+       int rc = 0;
 
        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
                                                    CPUSTAT_SM |
@@ -463,15 +631,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->ecb |= 0x10;
 
        vcpu->arch.sie_block->ecb2  = 8;
-       vcpu->arch.sie_block->eca   = 0xC1002001U;
+       vcpu->arch.sie_block->eca   = 0xC1002000U;
+       if (sclp_has_siif())
+               vcpu->arch.sie_block->eca |= 1;
        vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
-       if (kvm_enabled_cmma()) {
-               cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
-               if (cbrl) {
-                       vcpu->arch.sie_block->ecb2 |= 0x80;
-                       vcpu->arch.sie_block->ecb2 &= ~0x08;
-                       vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
-               }
+       vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+       if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+               rc = kvm_s390_vcpu_setup_cmma(vcpu);
+               if (rc)
+                       return rc;
        }
        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
        tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
@@ -479,7 +647,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
        get_cpu_id(&vcpu->arch.cpu_id);
        vcpu->arch.cpu_id.version = 0xff;
-       return 0;
+       return rc;
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -768,10 +936,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
        return -EINVAL; /* not implemented yet */
 }
 
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+                             KVM_GUESTDBG_USE_HW_BP | \
+                             KVM_GUESTDBG_ENABLE)
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                                        struct kvm_guest_debug *dbg)
 {
-       return -EINVAL; /* not implemented yet */
+       int rc = 0;
+
+       vcpu->guest_debug = 0;
+       kvm_s390_clear_bp_data(vcpu);
+
+       if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS)
+               return -EINVAL;
+
+       if (dbg->control & KVM_GUESTDBG_ENABLE) {
+               vcpu->guest_debug = dbg->control;
+               /* enforce guest PER */
+               atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+
+               if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+                       rc = kvm_s390_import_bp_data(vcpu, dbg);
+       } else {
+               atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+               vcpu->arch.guestdbg.last_bp = 0;
+       }
+
+       if (rc) {
+               vcpu->guest_debug = 0;
+               kvm_s390_clear_bp_data(vcpu);
+               atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+       }
+
+       return rc;
 }
 
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -786,6 +984,18 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
        return -EINVAL; /* not implemented yet */
 }
 
+bool kvm_s390_cmma_enabled(struct kvm *kvm)
+{
+       if (!MACHINE_IS_LPAR)
+               return false;
+       /* only enable for z10 and later */
+       if (!MACHINE_HAS_EDAT1)
+               return false;
+       if (!kvm->arch.use_cmma)
+               return false;
+       return true;
+}
+
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
        /*
@@ -882,8 +1092,9 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
        if (!vcpu->arch.gmap->pfault_enabled)
                return 0;
 
-       hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
-       if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+       hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+       hva += current->thread.gmap_addr & ~PAGE_MASK;
+       if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
                return 0;
 
        rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
@@ -916,6 +1127,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
        if (rc)
                return rc;
 
+       if (guestdbg_enabled(vcpu)) {
+               kvm_s390_backup_guest_per_regs(vcpu);
+               kvm_s390_patch_guest_per_regs(vcpu);
+       }
+
        vcpu->arch.sie_block->icptcode = 0;
        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
@@ -932,6 +1148,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
                   vcpu->arch.sie_block->icptcode);
        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 
+       if (guestdbg_enabled(vcpu))
+               kvm_s390_restore_guest_per_regs(vcpu);
+
        if (exit_reason >= 0) {
                rc = 0;
        } else if (kvm_is_ucontrol(vcpu->kvm)) {
@@ -968,16 +1187,6 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
        return rc;
 }
 
-bool kvm_enabled_cmma(void)
-{
-       if (!MACHINE_IS_LPAR)
-               return false;
-       /* only enable for z10 and later */
-       if (!MACHINE_HAS_EDAT1)
-               return false;
-       return true;
-}
-
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
        int rc, exit_reason;
@@ -1007,7 +1216,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
                rc = vcpu_post_run(vcpu, exit_reason);
-       } while (!signal_pending(current) && !rc);
+       } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
 
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        return rc;
@@ -1018,6 +1227,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        int rc;
        sigset_t sigsaved;
 
+       if (guestdbg_exit_pending(vcpu)) {
+               kvm_s390_prepare_debug_exit(vcpu);
+               return 0;
+       }
+
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -1030,6 +1244,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        case KVM_EXIT_S390_RESET:
        case KVM_EXIT_S390_UCONTROL:
        case KVM_EXIT_S390_TSCH:
+       case KVM_EXIT_DEBUG:
                break;
        default:
                BUG();
@@ -1055,6 +1270,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                rc = -EINTR;
        }
 
+       if (guestdbg_exit_pending(vcpu) && !rc)  {
+               kvm_s390_prepare_debug_exit(vcpu);
+               rc = 0;
+       }
+
        if (rc == -EOPNOTSUPP) {
                /* intercept cannot be handled in-kernel, prepare kvm-run */
                kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
@@ -1082,83 +1302,50 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return rc;
 }
 
-static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
-                      unsigned long n, int prefix)
-{
-       if (prefix)
-               return copy_to_guest(vcpu, guestdest, from, n);
-       else
-               return copy_to_guest_absolute(vcpu, guestdest, from, n);
-}
-
 /*
  * store status at address
  * we use have two special cases:
  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
  */
-int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
        unsigned char archmode = 1;
-       int prefix;
        u64 clkcomp;
+       int rc;
 
-       if (addr == KVM_S390_STORE_STATUS_NOADDR) {
-               if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+       if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+               if (write_guest_abs(vcpu, 163, &archmode, 1))
                        return -EFAULT;
-               addr = SAVE_AREA_BASE;
-               prefix = 0;
-       } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
-               if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+               gpa = SAVE_AREA_BASE;
+       } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+               if (write_guest_real(vcpu, 163, &archmode, 1))
                        return -EFAULT;
-               addr = SAVE_AREA_BASE;
-               prefix = 1;
-       } else
-               prefix = 0;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
-                       vcpu->arch.guest_fpregs.fprs, 128, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
-                       vcpu->run->s.regs.gprs, 128, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
-                       &vcpu->arch.sie_block->gpsw, 16, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
-                       &vcpu->arch.sie_block->prefix, 4, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu,
-                       addr + offsetof(struct save_area, fp_ctrl_reg),
-                       &vcpu->arch.guest_fpregs.fpc, 4, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
-                       &vcpu->arch.sie_block->todpr, 4, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
-                       &vcpu->arch.sie_block->cputm, 8, prefix))
-               return -EFAULT;
-
+               gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+       }
+       rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
+                            vcpu->arch.guest_fpregs.fprs, 128);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+                             vcpu->run->s.regs.gprs, 128);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+                             &vcpu->arch.sie_block->gpsw, 16);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+                             &vcpu->arch.sie_block->prefix, 4);
+       rc |= write_guest_abs(vcpu,
+                             gpa + offsetof(struct save_area, fp_ctrl_reg),
+                             &vcpu->arch.guest_fpregs.fpc, 4);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+                             &vcpu->arch.sie_block->todpr, 4);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+                             &vcpu->arch.sie_block->cputm, 8);
        clkcomp = vcpu->arch.sie_block->ckc >> 8;
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
-                       &clkcomp, 8, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
-                       &vcpu->run->s.regs.acrs, 64, prefix))
-               return -EFAULT;
-
-       if (__guestcopy(vcpu,
-                       addr + offsetof(struct save_area, ctrl_regs),
-                       &vcpu->arch.sie_block->gcr, 128, prefix))
-               return -EFAULT;
-       return 0;
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+                             &clkcomp, 8);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+                             &vcpu->run->s.regs.acrs, 64);
+       rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+                             &vcpu->arch.sie_block->gcr, 128);
+       return rc ? -EFAULT : 0;
 }
 
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
index 3c1e2274d9eae858fce363cd5f89ddb699e1fa05..9b5680d1f6cc637bc87f0f65078c6a6aa47e6299 100644 (file)
@@ -28,7 +28,6 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)    ((vcpu->arch.sie_block->ecb & 0x10))
-#define TDB_ADDR               0x1800UL
 #define TDB_FORMAT1            1
 #define IS_ITDB_VALID(vcpu)    ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
 
@@ -130,6 +129,7 @@ void kvm_s390_tasklet(unsigned long parm);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
                                    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -137,6 +137,8 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                    u64 cr6, u64 schid);
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+                             struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in priv.c */
@@ -145,6 +147,7 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 
@@ -158,14 +161,64 @@ void s390_vcpu_block(struct kvm_vcpu *vcpu);
 void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
 void exit_sie(struct kvm_vcpu *vcpu);
 void exit_sie_sync(struct kvm_vcpu *vcpu);
-/* are we going to support cmma? */
-bool kvm_enabled_cmma(void);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+/* is cmma enabled */
+bool kvm_s390_cmma_enabled(struct kvm *kvm);
+int test_vfacility(unsigned long nr);
+
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in interrupt.c */
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+                            struct kvm_s390_pgm_info *pgm_info);
+
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ *     return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ *         - zero if @rc was already zero
+ *         - zero or error code from injecting if @rc was positive
+ *           (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+       if (rc <= 0)
+               return rc;
+       return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
 
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
 
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+                           struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
+
 #endif
index 476e9e218f43ee5cfa2842951c845a0499c4834e..27f9051a78f85700f19a0f07491cadd8c51d0c83 100644 (file)
@@ -35,8 +35,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
        struct kvm_vcpu *cpup;
        s64 hostclk, val;
+       int i, rc;
        u64 op2;
-       int i;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -44,8 +44,9 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
        op2 = kvm_s390_get_base_disp_s(vcpu);
        if (op2 & 7)    /* Operand must be on a doubleword boundary */
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       if (get_guest(vcpu, val, (u64 __user *) op2))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = read_guest(vcpu, op2, &val, sizeof(val));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
 
        if (store_tod_clock(&hostclk)) {
                kvm_s390_set_psw_cc(vcpu, 3);
@@ -65,8 +66,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 static int handle_set_prefix(struct kvm_vcpu *vcpu)
 {
        u64 operand2;
-       u32 address = 0;
-       u8 tmp;
+       u32 address;
+       int rc;
 
        vcpu->stat.instruction_spx++;
 
@@ -80,14 +81,18 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
        /* get the value */
-       if (get_guest(vcpu, address, (u32 __user *) operand2))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = read_guest(vcpu, operand2, &address, sizeof(address));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
 
-       address = address & 0x7fffe000u;
+       address &= 0x7fffe000u;
 
-       /* make sure that the new value is valid memory */
-       if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
-          (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
+       /*
+        * Make sure the new value is valid memory. We only need to check the
+        * first page, since address is 8k aligned and memory pieces are always
+        * at least 1MB aligned and have at least a size of 1MB.
+        */
+       if (kvm_is_error_gpa(vcpu->kvm, address))
                return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
        kvm_s390_set_prefix(vcpu, address);
@@ -101,6 +106,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 {
        u64 operand2;
        u32 address;
+       int rc;
 
        vcpu->stat.instruction_stpx++;
 
@@ -117,8 +123,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
        address = address & 0x7fffe000u;
 
        /* get the value */
-       if (put_guest(vcpu, address, (u32 __user *)operand2))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = write_guest(vcpu, operand2, &address, sizeof(address));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
 
        VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
        trace_kvm_s390_handle_prefix(vcpu, 0, address);
@@ -127,28 +134,44 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 
 static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 {
-       u64 useraddr;
+       u16 vcpu_id = vcpu->vcpu_id;
+       u64 ga;
+       int rc;
 
        vcpu->stat.instruction_stap++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       useraddr = kvm_s390_get_base_disp_s(vcpu);
+       ga = kvm_s390_get_base_disp_s(vcpu);
 
-       if (useraddr & 1)
+       if (ga & 1)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
 
-       VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
-       trace_kvm_s390_handle_stap(vcpu, useraddr);
+       VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+       trace_kvm_s390_handle_stap(vcpu, ga);
        return 0;
 }
 
+static void __skey_check_enable(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
+               return;
+
+       s390_enable_skey();
+       trace_kvm_s390_skey_related_inst(vcpu);
+       vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+
 static int handle_skey(struct kvm_vcpu *vcpu)
 {
+       __skey_check_enable(vcpu);
+
        vcpu->stat.instruction_storage_key++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -160,9 +183,21 @@ static int handle_skey(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
+{
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+       vcpu->stat.instruction_ipte_interlock++;
+       if (psw_bits(*psw).p)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+       wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+       psw->addr = __rewind_psw(*psw, 4);
+       VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
+       return 0;
+}
+
 static int handle_test_block(struct kvm_vcpu *vcpu)
 {
-       unsigned long hva;
        gpa_t addr;
        int reg2;
 
@@ -173,14 +208,13 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
        addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
        addr = kvm_s390_real_to_abs(vcpu, addr);
 
-       hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
-       if (kvm_is_error_hva(hva))
+       if (kvm_is_error_gpa(vcpu->kvm, addr))
                return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
        /*
         * We don't expect errors on modern systems, and do not care
         * about storage keys (yet), so let's just clear the page.
         */
-       if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
+       if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
                return -EFAULT;
        kvm_s390_set_psw_cc(vcpu, 0);
        vcpu->run->s.regs.gprs[0] = 0;
@@ -190,9 +224,12 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 static int handle_tpi(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_interrupt_info *inti;
+       unsigned long len;
+       u32 tpi_data[3];
+       int cc, rc;
        u64 addr;
-       int cc;
 
+       rc = 0;
        addr = kvm_s390_get_base_disp_s(vcpu);
        if (addr & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -201,30 +238,41 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
        if (!inti)
                goto no_interrupt;
        cc = 1;
+       tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+       tpi_data[1] = inti->io.io_int_parm;
+       tpi_data[2] = inti->io.io_int_word;
        if (addr) {
                /*
                 * Store the two-word I/O interruption code into the
                 * provided area.
                 */
-               if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
-                   || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
-                   || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
-                       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+               len = sizeof(tpi_data) - 4;
+               rc = write_guest(vcpu, addr, &tpi_data, len);
+               if (rc)
+                       return kvm_s390_inject_prog_cond(vcpu, rc);
        } else {
                /*
                 * Store the three-word I/O interruption code into
                 * the appropriate lowcore area.
                 */
-               put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID);
-               put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR);
-               put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM);
-               put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
+               len = sizeof(tpi_data);
+               if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+                       rc = -EFAULT;
        }
-       kfree(inti);
+       /*
+        * If we encounter a problem storing the interruption code, the
+        * instruction is suppressed from the guest's view: reinject the
+        * interrupt.
+        */
+       if (!rc)
+               kfree(inti);
+       else
+               kvm_s390_reinject_io_int(vcpu->kvm, inti);
 no_interrupt:
        /* Set condition code and we're done. */
-       kvm_s390_set_psw_cc(vcpu, cc);
-       return 0;
+       if (!rc)
+               kvm_s390_set_psw_cc(vcpu, cc);
+       return rc ? -EFAULT : 0;
 }
 
 static int handle_tsch(struct kvm_vcpu *vcpu)
@@ -292,10 +340,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
-                          vfacilities, 4);
+       rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+                           vfacilities, 4);
        if (rc)
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+               return rc;
        VCPU_EVENT(vcpu, 5, "store facility list value %x",
                   *(unsigned int *) vfacilities);
        trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
@@ -333,6 +381,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
        psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
        psw_compat_t new_psw;
        u64 addr;
+       int rc;
 
        if (gpsw->mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -340,8 +389,10 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
        addr = kvm_s390_get_base_disp_s(vcpu);
        if (addr & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+       rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
        if (!(new_psw.mask & PSW32_MASK_BASE))
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
        gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
@@ -357,6 +408,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 {
        psw_t new_psw;
        u64 addr;
+       int rc;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -364,8 +416,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
        addr = kvm_s390_get_base_disp_s(vcpu);
        if (addr & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
        vcpu->arch.sie_block->gpsw = new_psw;
        if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -375,7 +428,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 
 static int handle_stidp(struct kvm_vcpu *vcpu)
 {
+       u64 stidp_data = vcpu->arch.stidp_data;
        u64 operand2;
+       int rc;
 
        vcpu->stat.instruction_stidp++;
 
@@ -387,8 +442,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
        if (operand2 & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
 
        VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
        return 0;
@@ -474,9 +530,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
                break;
        }
 
-       if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
-               rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-               goto out_exception;
+       rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+       if (rc) {
+               rc = kvm_s390_inject_prog_cond(vcpu, rc);
+               goto out;
        }
        trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
        free_page(mem);
@@ -485,7 +542,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        return 0;
 out_no_data:
        kvm_s390_set_psw_cc(vcpu, 3);
-out_exception:
+out:
        free_page(mem);
        return rc;
 }
@@ -496,6 +553,7 @@ static const intercept_handler_t b2_handlers[256] = {
        [0x10] = handle_set_prefix,
        [0x11] = handle_store_prefix,
        [0x12] = handle_store_cpu_address,
+       [0x21] = handle_ipte_interlock,
        [0x29] = handle_skey,
        [0x2a] = handle_skey,
        [0x2b] = handle_skey,
@@ -513,6 +571,7 @@ static const intercept_handler_t b2_handlers[256] = {
        [0x3a] = handle_io_inst,
        [0x3b] = handle_io_inst,
        [0x3c] = handle_io_inst,
+       [0x50] = handle_ipte_interlock,
        [0x5f] = handle_io_inst,
        [0x74] = handle_io_inst,
        [0x76] = handle_io_inst,
@@ -618,6 +677,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
                }
 
                if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+                       __skey_check_enable(vcpu);
                        if (set_guest_storage_key(current->mm, useraddr,
                                        vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
                                        vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
@@ -642,7 +702,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
        VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
        gmap = vcpu->arch.gmap;
        vcpu->stat.instruction_essa++;
-       if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo)
+       if (!kvm_s390_cmma_enabled(vcpu->kvm))
                return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -672,7 +732,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 }
 
 static const intercept_handler_t b9_handlers[256] = {
+       [0x8a] = handle_ipte_interlock,
        [0x8d] = handle_epsw,
+       [0x8e] = handle_ipte_interlock,
+       [0x8f] = handle_ipte_interlock,
        [0xab] = handle_essa,
        [0xaf] = handle_pfmf,
 };
@@ -693,32 +756,67 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
        int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
        int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u64 useraddr;
        u32 val = 0;
        int reg, rc;
+       u64 ga;
 
        vcpu->stat.instruction_lctl++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       useraddr = kvm_s390_get_base_disp_rs(vcpu);
+       ga = kvm_s390_get_base_disp_rs(vcpu);
 
-       if (useraddr & 3)
+       if (ga & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
-                  useraddr);
-       trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
+       VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+       trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
        reg = reg1;
        do {
-               rc = get_guest(vcpu, val, (u32 __user *) useraddr);
+               rc = read_guest(vcpu, ga, &val, sizeof(val));
                if (rc)
-                       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+                       return kvm_s390_inject_prog_cond(vcpu, rc);
                vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
                vcpu->arch.sie_block->gcr[reg] |= val;
-               useraddr += 4;
+               ga += 4;
+               if (reg == reg3)
+                       break;
+               reg = (reg + 1) % 16;
+       } while (1);
+
+       return 0;
+}
+
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+       int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+       int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+       u64 ga;
+       u32 val;
+       int reg, rc;
+
+       vcpu->stat.instruction_stctl++;
+
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       ga = kvm_s390_get_base_disp_rs(vcpu);
+
+       if (ga & 3)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+       VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+       trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+       reg = reg1;
+       do {
+               val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
+               rc = write_guest(vcpu, ga, &val, sizeof(val));
+               if (rc)
+                       return kvm_s390_inject_prog_cond(vcpu, rc);
+               ga += 4;
                if (reg == reg3)
                        break;
                reg = (reg + 1) % 16;
@@ -731,7 +829,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
        int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
        int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u64 useraddr;
+       u64 ga, val;
        int reg, rc;
 
        vcpu->stat.instruction_lctlg++;
@@ -739,23 +837,58 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       useraddr = kvm_s390_get_base_disp_rsy(vcpu);
+       ga = kvm_s390_get_base_disp_rsy(vcpu);
 
-       if (useraddr & 7)
+       if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
        reg = reg1;
 
-       VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
-                  useraddr);
-       trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
+       VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+       trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
        do {
-               rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
-                              (u64 __user *) useraddr);
+               rc = read_guest(vcpu, ga, &val, sizeof(val));
                if (rc)
-                       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-               useraddr += 8;
+                       return kvm_s390_inject_prog_cond(vcpu, rc);
+               vcpu->arch.sie_block->gcr[reg] = val;
+               ga += 8;
+               if (reg == reg3)
+                       break;
+               reg = (reg + 1) % 16;
+       } while (1);
+
+       return 0;
+}
+
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+       int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+       int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+       u64 ga, val;
+       int reg, rc;
+
+       vcpu->stat.instruction_stctg++;
+
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       ga = kvm_s390_get_base_disp_rsy(vcpu);
+
+       if (ga & 7)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+       reg = reg1;
+
+       VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+       trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+       do {
+               val = vcpu->arch.sie_block->gcr[reg];
+               rc = write_guest(vcpu, ga, &val, sizeof(val));
+               if (rc)
+                       return kvm_s390_inject_prog_cond(vcpu, rc);
+               ga += 8;
                if (reg == reg3)
                        break;
                reg = (reg + 1) % 16;
@@ -766,6 +899,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 
 static const intercept_handler_t eb_handlers[256] = {
        [0x2f] = handle_lctlg,
+       [0x25] = handle_stctg,
 };
 
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
index 26caeb530a7829f6688a31e8ecaa84203b2496bb..c0b99e0f6b63a52bcf0d037da403319878c6817a 100644 (file)
@@ -235,7 +235,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
        struct kvm_vcpu *dst_vcpu = NULL;
        struct kvm_s390_interrupt_info *inti;
        int rc;
-       u8 tmp;
 
        if (cpu_addr < KVM_MAX_VCPUS)
                dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
@@ -243,10 +242,13 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
                return SIGP_CC_NOT_OPERATIONAL;
        li = &dst_vcpu->arch.local_int;
 
-       /* make sure that the new value is valid memory */
-       address = address & 0x7fffe000u;
-       if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
-          copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) {
+       /*
+        * Make sure the new value is valid memory. We only need to check the
+        * first page, since address is 8k aligned and memory pieces are always
+        * at least 1MB aligned and have at least a size of 1MB.
+        */
+       address &= 0x7fffe000u;
+       if (kvm_is_error_gpa(vcpu->kvm, address)) {
                *reg &= 0xffffffff00000000UL;
                *reg |= SIGP_STATUS_INVALID_PARAMETER;
                return SIGP_CC_STATUS_STORED;
index e8e7213d4cc53a31c2b7f6e86bfffe6fa4a9bc67..abf6ba52769e9815b0f9fee383656f488030e641 100644 (file)
        TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,           \
                  __entry->pswmask, __entry->pswaddr, p_args)
 
+TRACE_EVENT(kvm_s390_skey_related_inst,
+           TP_PROTO(VCPU_PROTO_COMMON),
+           TP_ARGS(VCPU_ARGS_COMMON),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   ),
+           VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+       );
+
 TRACE_EVENT(kvm_s390_major_guest_pfault,
            TP_PROTO(VCPU_PROTO_COMMON),
            TP_ARGS(VCPU_ARGS_COMMON),
@@ -301,6 +315,31 @@ TRACE_EVENT(kvm_s390_handle_lctl,
                           __entry->reg1, __entry->reg3, __entry->addr)
        );
 
+TRACE_EVENT(kvm_s390_handle_stctl,
+           TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+           TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+           TP_STRUCT__entry(
+                   VCPU_FIELD_COMMON
+                   __field(int, g)
+                   __field(int, reg1)
+                   __field(int, reg3)
+                   __field(u64, addr)
+                   ),
+
+           TP_fast_assign(
+                   VCPU_ASSIGN_COMMON
+                   __entry->g = g;
+                   __entry->reg1 = reg1;
+                   __entry->reg3 = reg3;
+                   __entry->addr = addr;
+                   ),
+
+           VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+                          __entry->g ? "stctg" : "stctl",
+                          __entry->reg1, __entry->reg3, __entry->addr)
+       );
+
 TRACE_EVENT(kvm_s390_handle_prefix,
            TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
            TP_ARGS(VCPU_ARGS_COMMON, set, address),
index d7cfd57815fbe484283819a98745f7e26e6d1d47..ea4a31b9599000648741bc40caad87f16f5433a0 100644 (file)
@@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
        }
        spin_unlock(&gmap_notifier_lock);
 }
+EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 
 static inline int page_table_with_pgste(struct page *page)
 {
@@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
        atomic_set(&page->_mapcount, 0);
        table = (unsigned long *) page_to_phys(page);
        clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-       clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
-                   PAGE_SIZE/2);
+       clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
        return table;
 }
 
@@ -883,8 +883,8 @@ static inline void page_table_free_pgste(unsigned long *table)
        __free_page(page);
 }
 
-static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
-                       pmd_t *pmd, unsigned long addr, unsigned long end)
+static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end, bool init_skey)
 {
        pte_t *start_pte, *pte;
        spinlock_t *ptl;
@@ -895,6 +895,22 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
        do {
                pgste = pgste_get_lock(pte);
                pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+               if (init_skey) {
+                       unsigned long address;
+
+                       pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+                                             PGSTE_GR_BIT | PGSTE_GC_BIT);
+
+                       /* skip invalid and not writable pages */
+                       if (pte_val(*pte) & _PAGE_INVALID ||
+                           !(pte_val(*pte) & _PAGE_WRITE)) {
+                               pgste_set_unlock(pte, pgste);
+                               continue;
+                       }
+
+                       address = pte_val(*pte) & PAGE_MASK;
+                       page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
+               }
                pgste_set_unlock(pte, pgste);
        } while (pte++, addr += PAGE_SIZE, addr != end);
        pte_unmap_unlock(start_pte, ptl);
@@ -902,8 +918,8 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
        return addr;
 }
 
-static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
-                       pud_t *pud, unsigned long addr, unsigned long end)
+static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end, bool init_skey)
 {
        unsigned long next;
        pmd_t *pmd;
@@ -913,14 +929,14 @@ static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
                next = pmd_addr_end(addr, end);
                if (pmd_none_or_clear_bad(pmd))
                        continue;
-               next = page_table_reset_pte(mm, pmd, addr, next);
+               next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
        } while (pmd++, addr = next, addr != end);
 
        return addr;
 }
 
-static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
-                       pgd_t *pgd, unsigned long addr, unsigned long end)
+static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end, bool init_skey)
 {
        unsigned long next;
        pud_t *pud;
@@ -930,14 +946,14 @@ static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
                next = pud_addr_end(addr, end);
                if (pud_none_or_clear_bad(pud))
                        continue;
-               next = page_table_reset_pmd(mm, pud, addr, next);
+               next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
        } while (pud++, addr = next, addr != end);
 
        return addr;
 }
 
-void page_table_reset_pgste(struct mm_struct *mm,
-                       unsigned long start, unsigned long end)
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+                           unsigned long end, bool init_skey)
 {
        unsigned long addr, next;
        pgd_t *pgd;
@@ -949,7 +965,7 @@ void page_table_reset_pgste(struct mm_struct *mm,
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
-               next = page_table_reset_pud(mm, pgd, addr, next);
+               next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
        } while (pgd++, addr = next, addr != end);
        up_read(&mm->mmap_sem);
 }
@@ -989,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
        /* changing the guest storage key is considered a change of the page */
        if ((pgste_val(new) ^ pgste_val(old)) &
            (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
-               pgste_val(new) |= PGSTE_HC_BIT;
+               pgste_val(new) |= PGSTE_UC_BIT;
 
        pgste_set_unlock(ptep, new);
        pte_unmap_unlock(*ptep, ptl);
@@ -1011,6 +1027,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
        return NULL;
 }
 
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+                           unsigned long end, bool init_skey)
+{
+}
+
 static inline void page_table_free_pgste(unsigned long *table)
 {
 }
@@ -1357,6 +1378,50 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+void s390_enable_skey(void)
+{
+       /*
+        * To avoid races between multiple vcpus, ending in calling
+        * page_table_reset twice or more,
+        * the page_table_lock is taken for serialization.
+        */
+       spin_lock(&current->mm->page_table_lock);
+       if (mm_use_skey(current->mm)) {
+               spin_unlock(&current->mm->page_table_lock);
+               return;
+       }
+
+       current->mm->context.use_skey = 1;
+       spin_unlock(&current->mm->page_table_lock);
+       page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
+{
+       pte_t *pte;
+       spinlock_t *ptl;
+       bool dirty = false;
+
+       pte = get_locked_pte(gmap->mm, address, &ptl);
+       if (unlikely(!pte))
+               return false;
+
+       if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
+               dirty = true;
+
+       spin_unlock(ptl);
+       return dirty;
+}
+EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
                           pmd_t *pmdp)
index 14196ea0fdf3ad878b6fe527733d8390f0ba6a25..b57fe0efb4227d14836ac007d65e57d1168f5530 100644 (file)
@@ -22,7 +22,8 @@ struct read_info_sccb {
        u8      rnsize;                 /* 10 */
        u8      _reserved0[16 - 11];    /* 11-15 */
        u16     ncpurl;                 /* 16-17 */
-       u8      _reserved7[24 - 18];    /* 18-23 */
+       u16     cpuoff;                 /* 18-19 */
+       u8      _reserved7[24 - 20];    /* 20-23 */
        u8      loadparm[8];            /* 24-31 */
        u8      _reserved1[48 - 32];    /* 32-47 */
        u64     facilities;             /* 48-55 */
@@ -45,6 +46,7 @@ static unsigned int sclp_con_has_linemode __initdata;
 static unsigned long sclp_hsa_size;
 static unsigned int sclp_max_cpu;
 static struct sclp_ipl_info sclp_ipl_info;
+static unsigned char sclp_siif;
 
 u64 sclp_facilities;
 u8 sclp_fac84;
@@ -96,6 +98,9 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
 
 static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 {
+       struct sclp_cpu_entry *cpue;
+       u16 boot_cpu_address, cpu;
+
        if (sclp_read_info_early(sccb))
                return;
 
@@ -116,6 +121,15 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
                sclp_max_cpu = sccb->hcpua + 1;
        }
 
+       boot_cpu_address = stap();
+       cpue = (void *)sccb + sccb->cpuoff;
+       for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) {
+               if (boot_cpu_address != cpue->address)
+                       continue;
+               sclp_siif = cpue->siif;
+               break;
+       }
+
        /* Save IPL information */
        sclp_ipl_info.is_valid = 1;
        if (sccb->flags & 0x2)
@@ -148,6 +162,12 @@ unsigned int sclp_get_max_cpu(void)
        return sclp_max_cpu;
 }
 
+int sclp_has_siif(void)
+{
+       return sclp_siif;
+}
+EXPORT_SYMBOL(sclp_has_siif);
+
 /*
  * This function will be called after sclp_facilities_detect(), which gets
  * called from early.c code. The sclp_facilities_detect() function retrieves
index 6c3c2eb96d0693f2c37964db739978f102423596..32d263f683dc0271511a5668158c3251ea7d346c 100644 (file)
@@ -880,6 +880,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
        return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+       unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+       return kvm_is_error_hva(hva);
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
        set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
index d8a6ce4c2a83c8e3ae8e39fc86218b5fd4db0792..836e15b7abc8bb7011626469d0a12217d2c77258 100644 (file)
@@ -745,6 +745,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ENABLE_CAP_VM 98
 #define KVM_CAP_S390_IRQCHIP 99
 #define KVM_CAP_IOEVENTFD_NO_LENGTH 100
+#define KVM_CAP_VM_ATTRIBUTES 101
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index 96456ac888ba3822f5511773296bd9ea6853945d..ea46d64c8e75f7666af08225a0091b366cbe727e 100644 (file)
@@ -637,14 +637,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
  */
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
-#ifndef CONFIG_S390
        unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
 
        memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
        if (!memslot->dirty_bitmap)
                return -ENOMEM;
 
-#endif /* !CONFIG_S390 */
        return 0;
 }