KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
authorPaul Mackerras <paulus@samba.org>
Thu, 2 Jul 2015 10:38:16 +0000 (20:38 +1000)
committerAlexander Graf <agraf@suse.de>
Sat, 22 Aug 2015 09:16:17 +0000 (11:16 +0200)
This builds on the ability to run more than one vcore on a physical
core by using the micro-threading (split-core) modes of the POWER8
chip.  Previously, only vcores from the same VM could be run together,
and (on POWER8) only if they had just one thread per core.  With the
ability to split the core on guest entry and unsplit it on guest exit,
we can run up to 8 vcpu threads from up to 4 different VMs, and we can
run multiple vcores with 2 or 4 vcpus per vcore.

Dynamic micro-threading is only available if the static configuration
of the cores is whole-core mode (unsplit), and only on POWER8.

To manage this, we introduce a new kvm_split_mode struct which is
shared across all of the subcores in the core, with a pointer in the
paca on each thread.  In addition we extend the core_info struct to
have information on each subcore.  When deciding whether to add a
vcore to the set already on the core, we now have two possibilities:
(a) piggyback the vcore onto an existing subcore, or (b) start a new
subcore.

Currently, when any vcpu needs to exit the guest and switch to host
virtual mode, we interrupt all the threads in all subcores and switch
the core back to whole-core mode.  It may be possible in future to
allow some of the subcores to keep executing in the guest while
subcore 0 switches to the host, but that is not implemented in this
patch.

This adds a module parameter called dynamic_mt_modes which controls
which micro-threading (split-core) modes the code will consider, as a
bitmap.  In other words, if it is 0, no micro-threading mode is
considered; if it is 2, only 2-way micro-threading is considered; if
it is 4, only 4-way, and if it is 6, both 2-way and 4-way
micro-threading mode will be considered.  The default is 6.

With this, we now have secondary threads which are the primary thread
for their subcore and therefore need to do the MMU switch.  These
threads will need to be started even if they have no vcpu to run, so
we use the vcore pointer in the PACA rather than the vcpu pointer to
trigger them.

It is now possible for thread 0 to find that an exit has been
requested before it gets to switch the subcore state to the guest.  In
that case we haven't added the guest's timebase offset to the
timebase, so we need to be careful not to subtract the offset in the
guest exit path.  In fact we just skip the whole path that switches
back to host context, since we haven't switched to the guest context.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
arch/powerpc/include/asm/kvm_book3s_asm.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 5bdfb5dd34002348578b8b9c0a6164a04b3d4820..57d5dfef48bd4ac2e1f21374aa7101599badb47c 100644 (file)
 #define XICS_MFRR              0xc
 #define XICS_IPI               2       /* interrupt source # for IPIs */
 
+/* Maximum number of threads per physical core */
+#define MAX_SMT_THREADS                8
+
+/* Maximum number of subcores per physical core */
+#define MAX_SUBCORES           4
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
 
 #else  /*__ASSEMBLY__ */
 
+struct kvmppc_vcore;
+
+/* Struct used for coordinating micro-threading (split-core) mode changes */
+struct kvm_split_mode {
+       unsigned long   rpr;
+       unsigned long   pmmar;
+       unsigned long   ldbar;
+       u8              subcore_size;
+       u8              do_nap;
+       u8              napped[MAX_SMT_THREADS];
+       struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+};
+
 /*
  * This struct goes in the PACA on 64-bit processors.  It is used
  * to store host state that needs to be saved when we enter a guest
@@ -100,6 +119,7 @@ struct kvmppc_host_state {
        u64 host_spurr;
        u64 host_dscr;
        u64 dec_expires;
+       struct kvm_split_mode *kvm_split_mode;
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
        u64 cfar;
index 2b7449017ae864ad382d0cf67692f11e3804beb6..80eb29ab262acb51a3f6d8137e4b9a00cea5f476 100644 (file)
@@ -302,6 +302,9 @@ struct kvmppc_vcore {
 #define VCORE_EXIT_MAP(vc)     ((vc)->entry_exit_map >> 8)
 #define VCORE_IS_EXITING(vc)   (VCORE_EXIT_MAP(vc) != 0)
 
+/* This bit is used when a vcore exit is triggered from outside the vcore */
+#define VCORE_EXIT_REQ         0x10000
+
 /*
  * Values for vcore_state.
  * Note that these are arranged such that lower values
index a78cdbf9b6227e5bb3f122412597984d80d32e96..de62392f093c1cb9ca4448ff60b609b07678a661 100644 (file)
@@ -676,7 +676,14 @@ int main(void)
        HSTATE_FIELD(HSTATE_DSCR, host_dscr);
        HSTATE_FIELD(HSTATE_DABR, dabr);
        HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+       HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
        DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+       DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
+       DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
+       DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
+       DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
+       DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
+       DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
index 0173ce22111147aa8ff29d4eb34374c5be922872..6e3ef308b4c5d6f2f99fb13832293c26fa58ae3c 100644 (file)
@@ -81,6 +81,9 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 #define MPP_BUFFER_ORDER       3
 #endif
 
+static int dynamic_mt_modes = 6;
+module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
 static int target_smt_mode;
 module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
@@ -1770,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
 
        /* Ensure the thread won't go into the kernel if it wakes */
        tpaca->kvm_hstate.kvm_vcpu = NULL;
+       tpaca->kvm_hstate.kvm_vcore = NULL;
        tpaca->kvm_hstate.napping = 0;
        smp_wmb();
        tpaca->kvm_hstate.hwthread_req = 1;
@@ -1801,28 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
        tpaca = &paca[cpu];
        tpaca->kvm_hstate.hwthread_req = 0;
        tpaca->kvm_hstate.kvm_vcpu = NULL;
+       tpaca->kvm_hstate.kvm_vcore = NULL;
+       tpaca->kvm_hstate.kvm_split_mode = NULL;
 }
 
-static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
        int cpu;
        struct paca_struct *tpaca;
-       struct kvmppc_vcore *vc = vcpu->arch.vcore;
        struct kvmppc_vcore *mvc = vc->master_vcore;
 
-       if (vcpu->arch.timer_running) {
-               hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-               vcpu->arch.timer_running = 0;
+       cpu = vc->pcpu;
+       if (vcpu) {
+               if (vcpu->arch.timer_running) {
+                       hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+                       vcpu->arch.timer_running = 0;
+               }
+               cpu += vcpu->arch.ptid;
+               vcpu->cpu = mvc->pcpu;
+               vcpu->arch.thread_cpu = cpu;
        }
-       cpu = vc->pcpu + vcpu->arch.ptid;
        tpaca = &paca[cpu];
-       tpaca->kvm_hstate.kvm_vcore = mvc;
+       tpaca->kvm_hstate.kvm_vcpu = vcpu;
        tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
-       vcpu->cpu = mvc->pcpu;
-       vcpu->arch.thread_cpu = cpu;
        /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
        smp_wmb();
-       tpaca->kvm_hstate.kvm_vcpu = vcpu;
+       tpaca->kvm_hstate.kvm_vcore = mvc;
        if (cpu != smp_processor_id())
                kvmppc_ipi_thread(cpu);
 }
@@ -1835,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
        for (loops = 0; loops < 1000000; ++loops) {
                /*
                 * Check if all threads are finished.
-                * We set the vcpu pointer when starting a thread
+                * We set the vcore pointer when starting a thread
                 * and the thread clears it when finished, so we look
-                * for any threads that still have a non-NULL vcpu ptr.
+                * for any threads that still have a non-NULL vcore ptr.
                 */
                for (i = 1; i < threads_per_subcore; ++i)
-                       if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+                       if (paca[cpu + i].kvm_hstate.kvm_vcore)
                                break;
                if (i == threads_per_subcore) {
                        HMT_medium();
@@ -1850,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
        }
        HMT_medium();
        for (i = 1; i < threads_per_subcore; ++i)
-               if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+               if (paca[cpu + i].kvm_hstate.kvm_vcore)
                        pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
 }
 
@@ -1965,17 +1973,55 @@ static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
        vc->vcore_state = VCORE_INACTIVE;
 }
 
+/*
+ * This stores information about the virtual cores currently
+ * assigned to a physical core.
+ */
 struct core_info {
+       int             n_subcores;
+       int             max_subcore_threads;
        int             total_threads;
-       struct list_head vcs;
+       int             subcore_threads[MAX_SUBCORES];
+       struct kvm      *subcore_vm[MAX_SUBCORES];
+       struct list_head vcs[MAX_SUBCORES];
 };
 
+/*
+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
+ * respectively in 2-way micro-threading (split-core) mode.
+ */
+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
+
 static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
 {
+       int sub;
+
        memset(cip, 0, sizeof(*cip));
+       cip->n_subcores = 1;
+       cip->max_subcore_threads = vc->num_threads;
        cip->total_threads = vc->num_threads;
-       INIT_LIST_HEAD(&cip->vcs);
-       list_add_tail(&vc->preempt_list, &cip->vcs);
+       cip->subcore_threads[0] = vc->num_threads;
+       cip->subcore_vm[0] = vc->kvm;
+       for (sub = 0; sub < MAX_SUBCORES; ++sub)
+               INIT_LIST_HEAD(&cip->vcs[sub]);
+       list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+}
+
+static bool subcore_config_ok(int n_subcores, int n_threads)
+{
+       /* Can only dynamically split if unsplit to begin with */
+       if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
+               return false;
+       if (n_subcores > MAX_SUBCORES)
+               return false;
+       if (n_subcores > 1) {
+               if (!(dynamic_mt_modes & 2))
+                       n_subcores = 4;
+               if (n_subcores > 2 && !(dynamic_mt_modes & 4))
+                       return false;
+       }
+
+       return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
 }
 
 static void init_master_vcore(struct kvmppc_vcore *vc)
@@ -1988,15 +2034,113 @@ static void init_master_vcore(struct kvmppc_vcore *vc)
 }
 
 /*
- * Work out whether it is possible to piggyback the execute of
- * vcore *pvc onto the execution of the other vcores described in *cip.
+ * See if the existing subcores can be split into 3 (or fewer) subcores
+ * of at most two threads each, so we can fit in another vcore.  This
+ * assumes there are at most two subcores and at most 6 threads in total.
  */
-static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
-                         int target_threads)
+static bool can_split_piggybacked_subcores(struct core_info *cip)
+{
+       int sub, new_sub;
+       int large_sub = -1;
+       int thr;
+       int n_subcores = cip->n_subcores;
+       struct kvmppc_vcore *vc, *vcnext;
+       struct kvmppc_vcore *master_vc = NULL;
+
+       for (sub = 0; sub < cip->n_subcores; ++sub) {
+               if (cip->subcore_threads[sub] <= 2)
+                       continue;
+               if (large_sub >= 0)
+                       return false;
+               large_sub = sub;
+               vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+                                     preempt_list);
+               if (vc->num_threads > 2)
+                       return false;
+               n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
+       }
+       if (n_subcores > 3 || large_sub < 0)
+               return false;
+
+       /*
+        * Seems feasible, so go through and move vcores to new subcores.
+        * Note that when we have two or more vcores in one subcore,
+        * all those vcores must have only one thread each.
+        */
+       new_sub = cip->n_subcores;
+       thr = 0;
+       sub = large_sub;
+       list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
+               if (thr >= 2) {
+                       list_del(&vc->preempt_list);
+                       list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
+                       /* vc->num_threads must be 1 */
+                       if (++cip->subcore_threads[new_sub] == 1) {
+                               cip->subcore_vm[new_sub] = vc->kvm;
+                               init_master_vcore(vc);
+                               master_vc = vc;
+                               ++cip->n_subcores;
+                       } else {
+                               vc->master_vcore = master_vc;
+                               ++new_sub;
+                       }
+               }
+               thr += vc->num_threads;
+       }
+       cip->subcore_threads[large_sub] = 2;
+       cip->max_subcore_threads = 2;
+
+       return true;
+}
+
+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+{
+       int n_threads = vc->num_threads;
+       int sub;
+
+       if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+               return false;
+
+       if (n_threads < cip->max_subcore_threads)
+               n_threads = cip->max_subcore_threads;
+       if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
+               cip->max_subcore_threads = n_threads;
+       } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
+                  vc->num_threads <= 2) {
+               /*
+                * We may be able to fit another subcore in by
+                * splitting an existing subcore with 3 or 4
+                * threads into two 2-thread subcores, or one
+                * with 5 or 6 threads into three subcores.
+                * We can only do this if those subcores have
+                * piggybacked virtual cores.
+                */
+               if (!can_split_piggybacked_subcores(cip))
+                       return false;
+       } else {
+               return false;
+       }
+
+       sub = cip->n_subcores;
+       ++cip->n_subcores;
+       cip->total_threads += vc->num_threads;
+       cip->subcore_threads[sub] = vc->num_threads;
+       cip->subcore_vm[sub] = vc->kvm;
+       init_master_vcore(vc);
+       list_del(&vc->preempt_list);
+       list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
+
+       return true;
+}
+
+static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
+                                 struct core_info *cip, int sub)
 {
        struct kvmppc_vcore *vc;
+       int n_thr;
 
-       vc = list_first_entry(&cip->vcs, struct kvmppc_vcore, preempt_list);
+       vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+                             preempt_list);
 
        /* require same VM and same per-core reg values */
        if (pvc->kvm != vc->kvm ||
@@ -2010,17 +2154,44 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
            (vc->num_threads > 1 || pvc->num_threads > 1))
                return false;
 
-       if (cip->total_threads + pvc->num_threads > target_threads)
-               return false;
+       n_thr = cip->subcore_threads[sub] + pvc->num_threads;
+       if (n_thr > cip->max_subcore_threads) {
+               if (!subcore_config_ok(cip->n_subcores, n_thr))
+                       return false;
+               cip->max_subcore_threads = n_thr;
+       }
 
        cip->total_threads += pvc->num_threads;
+       cip->subcore_threads[sub] = n_thr;
        pvc->master_vcore = vc;
        list_del(&pvc->preempt_list);
-       list_add_tail(&pvc->preempt_list, &cip->vcs);
+       list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
 
        return true;
 }
 
+/*
+ * Work out whether it is possible to piggyback the execution of
+ * vcore *pvc onto the execution of the other vcores described in *cip.
+ */
+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
+                         int target_threads)
+{
+       int sub;
+
+       if (cip->total_threads + pvc->num_threads > target_threads)
+               return false;
+       for (sub = 0; sub < cip->n_subcores; ++sub)
+               if (cip->subcore_threads[sub] &&
+                   can_piggyback_subcore(pvc, cip, sub))
+                       return true;
+
+       if (can_dynamic_split(pvc, cip))
+               return true;
+
+       return false;
+}
+
 static void prepare_threads(struct kvmppc_vcore *vc)
 {
        struct kvm_vcpu *vcpu, *vnext;
@@ -2135,6 +2306,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
        int srcu_idx;
        struct core_info core_info;
        struct kvmppc_vcore *pvc, *vcnext;
+       struct kvm_split_mode split_info, *sip;
+       int split, subcore_size, active;
+       int sub;
+       bool thr0_done;
+       unsigned long cmd_bit, stat_bit;
        int pcpu, thr;
        int target_threads;
 
@@ -2182,29 +2358,100 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
        if (vc->num_threads < target_threads)
                collect_piggybacks(&core_info, target_threads);
 
-       thr = 0;
-       list_for_each_entry(pvc, &core_info.vcs, preempt_list) {
-               pvc->pcpu = pcpu + thr;
-               list_for_each_entry(vcpu, &pvc->runnable_threads,
-                                   arch.run_list) {
-                       kvmppc_start_thread(vcpu);
-                       kvmppc_create_dtl_entry(vcpu, pvc);
-                       trace_kvm_guest_enter(vcpu);
+       /* Decide on micro-threading (split-core) mode */
+       subcore_size = threads_per_subcore;
+       cmd_bit = stat_bit = 0;
+       split = core_info.n_subcores;
+       sip = NULL;
+       if (split > 1) {
+               /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
+               if (split == 2 && (dynamic_mt_modes & 2)) {
+                       cmd_bit = HID0_POWER8_1TO2LPAR;
+                       stat_bit = HID0_POWER8_2LPARMODE;
+               } else {
+                       split = 4;
+                       cmd_bit = HID0_POWER8_1TO4LPAR;
+                       stat_bit = HID0_POWER8_4LPARMODE;
                }
-               thr += pvc->num_threads;
+               subcore_size = MAX_SMT_THREADS / split;
+               sip = &split_info;
+               memset(&split_info, 0, sizeof(split_info));
+               split_info.rpr = mfspr(SPRN_RPR);
+               split_info.pmmar = mfspr(SPRN_PMMAR);
+               split_info.ldbar = mfspr(SPRN_LDBAR);
+               split_info.subcore_size = subcore_size;
+               for (sub = 0; sub < core_info.n_subcores; ++sub)
+                       split_info.master_vcs[sub] =
+                               list_first_entry(&core_info.vcs[sub],
+                                       struct kvmppc_vcore, preempt_list);
+               /* order writes to split_info before kvm_split_mode pointer */
+               smp_wmb();
        }
-
-       /* Set this explicitly in case thread 0 doesn't have a vcpu */
-       get_paca()->kvm_hstate.kvm_vcore = vc;
-       get_paca()->kvm_hstate.ptid = 0;
+       pcpu = smp_processor_id();
+       for (thr = 0; thr < threads_per_subcore; ++thr)
+               paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+
+       /* Initiate micro-threading (split-core) if required */
+       if (cmd_bit) {
+               unsigned long hid0 = mfspr(SPRN_HID0);
+
+               hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
+               mb();
+               mtspr(SPRN_HID0, hid0);
+               isync();
+               for (;;) {
+                       hid0 = mfspr(SPRN_HID0);
+                       if (hid0 & stat_bit)
+                               break;
+                       cpu_relax();
+               }
+               split_info.do_nap = 1;  /* ask secondaries to nap when done */
+       }
+
+       /* Start all the threads */
+       active = 0;
+       for (sub = 0; sub < core_info.n_subcores; ++sub) {
+               thr = subcore_thread_map[sub];
+               thr0_done = false;
+               active |= 1 << thr;
+               list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
+                       pvc->pcpu = pcpu + thr;
+                       list_for_each_entry(vcpu, &pvc->runnable_threads,
+                                           arch.run_list) {
+                               kvmppc_start_thread(vcpu, pvc);
+                               kvmppc_create_dtl_entry(vcpu, pvc);
+                               trace_kvm_guest_enter(vcpu);
+                               if (!vcpu->arch.ptid)
+                                       thr0_done = true;
+                               active |= 1 << (thr + vcpu->arch.ptid);
+                       }
+                       /*
+                        * We need to start the first thread of each subcore
+                        * even if it doesn't have a vcpu.
+                        */
+                       if (pvc->master_vcore == pvc && !thr0_done)
+                               kvmppc_start_thread(NULL, pvc);
+                       thr += pvc->num_threads;
+               }
+       }
+       /*
+        * When doing micro-threading, poke the inactive threads as well.
+        * This gets them to the nap instruction after kvm_do_nap,
+        * which reduces the time taken to unsplit later.
+        */
+       if (split > 1)
+               for (thr = 1; thr < threads_per_subcore; ++thr)
+                       if (!(active & (1 << thr)))
+                               kvmppc_ipi_thread(pcpu + thr);
 
        vc->vcore_state = VCORE_RUNNING;
        preempt_disable();
 
        trace_kvmppc_run_core(vc, 0);
 
-       list_for_each_entry(pvc, &core_info.vcs, preempt_list)
-               spin_unlock(&pvc->lock);
+       for (sub = 0; sub < core_info.n_subcores; ++sub)
+               list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
+                       spin_unlock(&pvc->lock);
 
        kvm_guest_enter();
 
@@ -2226,16 +2473,44 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
        /* wait for secondary threads to finish writing their state to memory */
        kvmppc_wait_for_nap();
-       for (i = 0; i < threads_per_subcore; ++i)
-               kvmppc_release_hwthread(vc->pcpu + i);
+
+       /* Return to whole-core mode if we split the core earlier */
+       if (split > 1) {
+               unsigned long hid0 = mfspr(SPRN_HID0);
+               unsigned long loops = 0;
+
+               hid0 &= ~HID0_POWER8_DYNLPARDIS;
+               stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+               mb();
+               mtspr(SPRN_HID0, hid0);
+               isync();
+               for (;;) {
+                       hid0 = mfspr(SPRN_HID0);
+                       if (!(hid0 & stat_bit))
+                               break;
+                       cpu_relax();
+                       ++loops;
+               }
+               split_info.do_nap = 0;
+       }
+
+       /* Let secondaries go back to the offline loop */
+       for (i = 0; i < threads_per_subcore; ++i) {
+               kvmppc_release_hwthread(pcpu + i);
+               if (sip && sip->napped[i])
+                       kvmppc_ipi_thread(pcpu + i);
+       }
+
        spin_unlock(&vc->lock);
 
        /* make sure updates to secondary vcpu structs are visible now */
        smp_mb();
        kvm_guest_exit();
 
-       list_for_each_entry_safe(pvc, vcnext, &core_info.vcs, preempt_list)
-               post_guest_process(pvc, pvc == vc);
+       for (sub = 0; sub < core_info.n_subcores; ++sub)
+               list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
+                                        preempt_list)
+                       post_guest_process(pvc, pvc == vc);
 
        spin_lock(&vc->lock);
        preempt_enable();
@@ -2341,7 +2616,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                                if (mvc->vcore_state == VCORE_RUNNING &&
                                    !VCORE_IS_EXITING(mvc)) {
                                        kvmppc_create_dtl_entry(vcpu, vc);
-                                       kvmppc_start_thread(vcpu);
+                                       kvmppc_start_thread(vcpu, vc);
                                        trace_kvm_guest_enter(vcpu);
                                }
                                spin_unlock(&mvc->lock);
@@ -2349,7 +2624,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                } else if (vc->vcore_state == VCORE_RUNNING &&
                           !VCORE_IS_EXITING(vc)) {
                        kvmppc_create_dtl_entry(vcpu, vc);
-                       kvmppc_start_thread(vcpu);
+                       kvmppc_start_thread(vcpu, vc);
                        trace_kvm_guest_enter(vcpu);
                } else if (vc->vcore_state == VCORE_SLEEPING) {
                        wake_up(&vc->wq);
index 1fd0e305739643c4071aead95d59e1778768dcca..fd7006bf6b1a1a59a86fe42efcbbb5c9b485e6a1 100644 (file)
@@ -239,7 +239,8 @@ void kvmhv_commence_exit(int trap)
 {
        struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
        int ptid = local_paca->kvm_hstate.ptid;
-       int me, ee;
+       struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
+       int me, ee, i;
 
        /* Set our bit in the threads-exiting-guest map in the 0xff00
           bits of vcore->entry_exit_map */
@@ -259,4 +260,26 @@ void kvmhv_commence_exit(int trap)
         */
        if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
                kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+
+       /*
+        * If we are doing dynamic micro-threading, interrupt the other
+        * subcores to pull them out of their guests too.
+        */
+       if (!sip)
+               return;
+
+       for (i = 0; i < MAX_SUBCORES; ++i) {
+               vc = sip->master_vcs[i];
+               if (!vc)
+                       break;
+               do {
+                       ee = vc->entry_exit_map;
+                       /* Already asked to exit? */
+                       if ((ee >> 8) != 0)
+                               break;
+               } while (cmpxchg(&vc->entry_exit_map, ee,
+                                ee | VCORE_EXIT_REQ) != ee);
+               if ((ee >> 8) == 0)
+                       kvmhv_interrupt_vcore(vc, ee);
+       }
 }
index ac113b527bf9151add877875ce1b339a90c12b18..db2427db44715eda7858969ccd7d306bb0ad6158 100644 (file)
@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        subf    r4, r4, r3
        mtspr   SPRN_DEC, r4
 
+       /* hwthread_req may have got set by cede or no vcpu, so clear it */
+       li      r0, 0
+       stb     r0, HSTATE_HWTHREAD_REQ(r13)
+
        /*
         * For external and machine check interrupts, we need
         * to call the Linux handler to process the interrupt.
@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
        ld      r5, HSTATE_KVM_VCORE(r13)
        li      r0, 0
        stb     r0, HSTATE_NAPPING(r13)
-       stb     r0, HSTATE_HWTHREAD_REQ(r13)
 
        /* check the wake reason */
        bl      kvmppc_check_wake_reason
@@ -315,10 +318,10 @@ kvm_start_guest:
        cmpdi   r3, 0
        bge     kvm_no_guest
 
-       /* get vcpu pointer, NULL if we have no vcpu to run */
-       ld      r4,HSTATE_KVM_VCPU(r13)
-       cmpdi   r4,0
-       /* if we have no vcpu to run, go back to sleep */
+       /* get vcore pointer, NULL if we have nothing to run */
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       cmpdi   r5,0
+       /* if we have no vcore to run, go back to sleep */
        beq     kvm_no_guest
 
 kvm_secondary_got_guest:
@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
        ld      r6, PACA_DSCR_DEFAULT(r13)
        std     r6, HSTATE_DSCR(r13)
 
-       /* Order load of vcore, ptid etc. after load of vcpu */
+       /* On thread 0 of a subcore, set HDEC to max */
+       lbz     r4, HSTATE_PTID(r13)
+       cmpwi   r4, 0
+       bne     63f
+       lis     r6, 0x7fff
+       ori     r6, r6, 0xffff
+       mtspr   SPRN_HDEC, r6
+       /* and set per-LPAR registers, if doing dynamic micro-threading */
+       ld      r6, HSTATE_SPLIT_MODE(r13)
+       cmpdi   r6, 0
+       beq     63f
+       ld      r0, KVM_SPLIT_RPR(r6)
+       mtspr   SPRN_RPR, r0
+       ld      r0, KVM_SPLIT_PMMAR(r6)
+       mtspr   SPRN_PMMAR, r0
+       ld      r0, KVM_SPLIT_LDBAR(r6)
+       mtspr   SPRN_LDBAR, r0
+       isync
+63:
+       /* Order load of vcpu after load of vcore */
        lwsync
+       ld      r4, HSTATE_KVM_VCPU(r13)
        bl      kvmppc_hv_entry
 
        /* Back from the guest, go back to nap */
-       /* Clear our vcpu pointer so we don't come back in early */
+       /* Clear our vcpu and vcore pointers so we don't come back in early */
        li      r0, 0
+       std     r0, HSTATE_KVM_VCPU(r13)
        /*
-        * Once we clear HSTATE_KVM_VCPU(r13), the code in
+        * Once we clear HSTATE_KVM_VCORE(r13), the code in
         * kvmppc_run_core() is going to assume that all our vcpu
         * state is visible in memory.  This lwsync makes sure
         * that that is true.
         */
        lwsync
-       std     r0, HSTATE_KVM_VCPU(r13)
+       std     r0, HSTATE_KVM_VCORE(r13)
 
 /*
  * At this point we have finished executing in the guest.
@@ -374,16 +398,63 @@ kvm_no_guest:
        b       power7_wakeup_loss
 
 53:    HMT_LOW
-       ld      r4, HSTATE_KVM_VCPU(r13)
-       cmpdi   r4, 0
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       cmpdi   r5, 0
+       bne     60f
+       ld      r3, HSTATE_SPLIT_MODE(r13)
+       cmpdi   r3, 0
+       beq     kvm_no_guest
+       lbz     r0, KVM_SPLIT_DO_NAP(r3)
+       cmpwi   r0, 0
        beq     kvm_no_guest
        HMT_MEDIUM
+       b       kvm_unsplit_nap
+60:    HMT_MEDIUM
        b       kvm_secondary_got_guest
 
 54:    li      r0, KVM_HWTHREAD_IN_KVM
        stb     r0, HSTATE_HWTHREAD_STATE(r13)
        b       kvm_no_guest
 
+/*
+ * Here the primary thread is trying to return the core to
+ * whole-core mode, so we need to nap.
+ */
+kvm_unsplit_nap:
+       /* clear any pending message */
+BEGIN_FTR_SECTION
+       lis     r6, (PPC_DBELL_SERVER << (63-36))@h
+       PPC_MSGCLR(6)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+       /* Set kvm_split_mode.napped[tid] = 1 */
+       ld      r3, HSTATE_SPLIT_MODE(r13)
+       li      r0, 1
+       lhz     r4, PACAPACAINDEX(r13)
+       clrldi  r4, r4, 61      /* micro-threading => P8 => 8 threads/core */
+       addi    r4, r4, KVM_SPLIT_NAPPED
+       stbx    r0, r3, r4
+       /* Check the do_nap flag again after setting napped[] */
+       sync
+       lbz     r0, KVM_SPLIT_DO_NAP(r3)
+       cmpwi   r0, 0
+       beq     57f
+       li      r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
+       mfspr   r4, SPRN_LPCR
+       rlwimi  r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+       mtspr   SPRN_LPCR, r4
+       isync
+       std     r0, HSTATE_SCRATCH0(r13)
+       ptesync
+       ld      r0, HSTATE_SCRATCH0(r13)
+1:     cmpd    r0, r0
+       bne     1b
+       nap
+       b       .
+
+57:    li      r0, 0
+       stbx    r0, r3, r4
+       b       kvm_no_guest
+
 /******************************************************************************
  *                                                                            *
  *                               Entry code                                   *
@@ -854,7 +925,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        cmpwi   r0, 0
        bne     21f
        HMT_LOW
-20:    lbz     r0, VCORE_IN_GUEST(r5)
+20:    lwz     r3, VCORE_ENTRY_EXIT(r5)
+       cmpwi   r3, 0x100
+       bge     no_switch_exit
+       lbz     r0, VCORE_IN_GUEST(r5)
        cmpwi   r0, 0
        beq     20b
        HMT_MEDIUM
@@ -985,9 +1059,13 @@ secondary_too_late:
 #endif
 11:    b       kvmhv_switch_to_host
 
+no_switch_exit:
+       HMT_MEDIUM
+       li      r12, 0
+       b       12f
 hdec_soon:
        li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-       stw     r12, VCPU_TRAP(r4)
+12:    stw     r12, VCPU_TRAP(r4)
        mr      r9, r4
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
        addi    r3, r4, VCPU_TB_RMEXIT
@@ -1545,12 +1623,17 @@ kvmhv_switch_to_host:
 
        /* Primary thread waits for all the secondaries to exit guest */
 15:    lwz     r3,VCORE_ENTRY_EXIT(r5)
-       srwi    r0,r3,8
+       rlwinm  r0,r3,32-8,0xff
        clrldi  r3,r3,56
        cmpw    r3,r0
        bne     15b
        isync
 
+       /* Did we actually switch to the guest at all? */
+       lbz     r6, VCORE_IN_GUEST(r5)
+       cmpwi   r6, 0
+       beq     19f
+
        /* Primary thread switches back to host partition */
        ld      r6,KVM_HOST_SDR1(r4)
        lwz     r7,KVM_HOST_LPID(r4)
@@ -1594,7 +1677,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 18:
        /* Signal secondary CPUs to continue */
        stb     r0,VCORE_IN_GUEST(r5)
-       lis     r8,0x7fff               /* MAX_INT@h */
+19:    lis     r8,0x7fff               /* MAX_INT@h */
        mtspr   SPRN_HDEC,r8
 
 16:    ld      r8,KVM_HOST_LPCR(r4)