s390: add SMT support
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Wed, 14 Jan 2015 16:52:10 +0000 (17:52 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Thu, 22 Jan 2015 11:16:01 +0000 (12:16 +0100)
The multi-threading facility is introduced with the z13 processor family.
This patch adds code to detect the multi-threading facility. With the
facility enabled each core will surface multiple hardware threads to the
system. Each hardware threads looks like a normal CPU to the operating
system with all its registers and properties.

The SCLP interface reports the SMT topology indirectly via the maximum
thread id. Each reported CPU in the result of a read-scp-information
is a core representing a number of hardware threads.

To reflect the reduced CPU capacity if two hardware threads run on a
single core the MT utilization counter set is used to normalize the
raw cputime obtained by the CPU timer deltas. This scaled cputime is
reported via the taskstats interface. The normal /proc/stat numbers
are based on the raw cputime and are not affected by the normalization.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
18 files changed:
arch/s390/Kconfig
arch/s390/include/asm/cpu_mf.h
arch/s390/include/asm/reset.h
arch/s390/include/asm/sclp.h
arch/s390/include/asm/sigp.h
arch/s390/include/asm/smp.h
arch/s390/include/asm/sysinfo.h
arch/s390/include/asm/topology.h
arch/s390/kernel/base.S
arch/s390/kernel/dis.c
arch/s390/kernel/ipl.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/smp.c
arch/s390/kernel/sysinfo.c
arch/s390/kernel/topology.c
arch/s390/kernel/vtime.c
drivers/s390/char/sclp_early.c
drivers/s390/cio/cio.c

index 06c6d9ab5a8da73c8ed08fbcf35843a743177299..7eba5b5723e98614745f43c1258c7f76033eb5fd 100644 (file)
@@ -396,17 +396,26 @@ config HOTPLUG_CPU
          can be controlled through /sys/devices/system/cpu/cpu#.
          Say N if you want to disable CPU hotplug.
 
+config SCHED_SMT
+       def_bool n
+
 config SCHED_MC
        def_bool n
 
 config SCHED_BOOK
+       def_bool n
+
+config SCHED_TOPOLOGY
        def_bool y
-       prompt "Book scheduler support"
+       prompt "Topology scheduler support"
        depends on SMP
+       select SCHED_SMT
        select SCHED_MC
+       select SCHED_BOOK
        help
-         Book scheduler support improves the CPU scheduler's decision making
-         when dealing with machines that have several books.
+         Topology scheduler support improves the CPU scheduler's decision
+         making when dealing with machines that have multi-threading,
+         multiple cores or multiple books.
 
 source kernel/Kconfig.preempt
 
index cb700d54bd832a91256803b35bbff06adc3a95d3..5243a8679a1dcb7cc69037714fabba64c652dcf1 100644 (file)
@@ -189,6 +189,20 @@ static inline int ecctr(u64 ctr, u64 *val)
        return cc;
 }
 
+/* Store CPU counter multiple for the MT utilization counter set */
+static inline int stcctm5(u64 num, u64 *val)
+{
+       typedef struct { u64 _[num]; } addrtype;
+       int cc;
+
+       asm volatile (
+               "       .insn   rsy,0xeb0000000017,%2,5,%1\n"
+               "       ipm     %0\n"
+               "       srl     %0,28\n"
+               : "=d" (cc), "=Q" (*(addrtype *) val)  : "d" (num) : "cc");
+       return cc;
+}
+
 /* Query sampling information */
 static inline int qsi(struct hws_qsi_info_block *info)
 {
index 804578587a7a87e6e0b9eb56a1aee121bf9d2fdf..72786067b300528632dfb2a6f30917fddc48a2e1 100644 (file)
@@ -15,5 +15,6 @@ struct reset_call {
 
 extern void register_reset_call(struct reset_call *reset);
 extern void unregister_reset_call(struct reset_call *reset);
-extern void s390_reset_system(void (*func)(void *), void *data);
+extern void s390_reset_system(void (*fn_pre)(void),
+                             void (*fn_post)(void *), void *data);
 #endif /* _ASM_S390_RESET_H */
index b6f8066789c1cc15eee6a69ac88b7392f10baf9a..edb453cfc2c635b36dcae386cdac5b317e02ddaa 100644 (file)
@@ -27,7 +27,7 @@ struct sclp_ipl_info {
 };
 
 struct sclp_cpu_entry {
-       u8 address;
+       u8 core_id;
        u8 reserved0[2];
        u8 : 3;
        u8 siif : 1;
@@ -51,6 +51,9 @@ int sclp_cpu_deconfigure(u8 cpu);
 unsigned long long sclp_get_rnmax(void);
 unsigned long long sclp_get_rzm(void);
 unsigned int sclp_get_max_cpu(void);
+unsigned int sclp_get_mtid(u8 cpu_type);
+unsigned int sclp_get_mtid_max(void);
+unsigned int sclp_get_mtid_prev(void);
 int sclp_sdias_blk_count(void);
 int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
 int sclp_chp_configure(struct chp_id chpid);
index 49576115dbb76da7659bdcceffca7f1452e37fdf..c49d9c0483a83f0c0178e512ba8d6c1cbb889fd4 100644 (file)
@@ -15,6 +15,7 @@
 #define SIGP_SET_ARCHITECTURE       18
 #define SIGP_COND_EMERGENCY_SIGNAL   19
 #define SIGP_SENSE_RUNNING          21
+#define SIGP_SET_MULTI_THREADING     22
 #define SIGP_STORE_ADDITIONAL_STATUS 23
 
 /* SIGP condition codes */
index 762d4f88af5ae3453402e99fd216bd612e2d4377..b3bd0282dd9870c93f1ddd66472afc4170b356a1 100644 (file)
@@ -16,6 +16,8 @@
 #define raw_smp_processor_id() (S390_lowcore.cpu_nr)
 
 extern struct mutex smp_cpu_state_mutex;
+extern unsigned int smp_cpu_mt_shift;
+extern unsigned int smp_cpu_mtid;
 
 extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
 
@@ -35,6 +37,8 @@ extern void smp_fill_possible_mask(void);
 
 #else /* CONFIG_SMP */
 
+#define smp_cpu_mtid   0
+
 static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
 {
        func(data);
index f92428e459f8b8cc7352321332ca52431f55f51f..73f12d21af4dc4f5ce3856e95761b8521b5a2e5b 100644 (file)
@@ -90,7 +90,11 @@ struct sysinfo_2_2_2 {
        unsigned short cpus_reserved;
        char name[8];
        unsigned int caf;
-       char reserved_2[16];
+       char reserved_2[8];
+       unsigned char mt_installed;
+       unsigned char mt_general;
+       unsigned char mt_psmtid;
+       char reserved_3[5];
        unsigned short cpus_dedicated;
        unsigned short cpus_shared;
 };
@@ -120,26 +124,28 @@ struct sysinfo_3_2_2 {
 
 extern int topology_max_mnest;
 
-#define TOPOLOGY_CPU_BITS      64
+#define TOPOLOGY_CORE_BITS     64
 #define TOPOLOGY_NR_MAG                6
 
-struct topology_cpu {
-       unsigned char reserved0[4];
+struct topology_core {
+       unsigned char nl;
+       unsigned char reserved0[3];
        unsigned char :6;
        unsigned char pp:2;
        unsigned char reserved1;
        unsigned short origin;
-       unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG];
+       unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG];
 };
 
 struct topology_container {
-       unsigned char reserved[7];
+       unsigned char nl;
+       unsigned char reserved[6];
        unsigned char id;
 };
 
 union topology_entry {
        unsigned char nl;
-       struct topology_cpu cpu;
+       struct topology_core cpu;
        struct topology_container container;
 };
 
index 56af53093d24d3660ed1267e229eccb6925b56ef..c4fbb9527c5ca2b553ca98c3f7887ea236fdff1c 100644 (file)
@@ -9,9 +9,11 @@ struct cpu;
 #ifdef CONFIG_SCHED_BOOK
 
 struct cpu_topology_s390 {
+       unsigned short thread_id;
        unsigned short core_id;
        unsigned short socket_id;
        unsigned short book_id;
+       cpumask_t thread_mask;
        cpumask_t core_mask;
        cpumask_t book_mask;
 };
@@ -19,6 +21,8 @@ struct cpu_topology_s390 {
 extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
 
 #define topology_physical_package_id(cpu)      (cpu_topology[cpu].socket_id)
+#define topology_thread_id(cpu)                        (cpu_topology[cpu].thread_id)
+#define topology_thread_cpumask(cpu)           (&cpu_topology[cpu].thread_mask)
 #define topology_core_id(cpu)                  (cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)             (&cpu_topology[cpu].core_mask)
 #define topology_book_id(cpu)                  (cpu_topology[cpu].book_id)
index 797a823a22755d9b76f8c12717e043226c7395bd..f74a53d339b0b79b361304cffa9dacf8aaac2106 100644 (file)
@@ -97,7 +97,8 @@ ENTRY(diag308_reset)
        lg      %r4,0(%r4)              # Save PSW
        sturg   %r4,%r3                 # Use sturg, because of large pages
        lghi    %r1,1
-       diag    %r1,%r1,0x308
+       lghi    %r0,0
+       diag    %r0,%r1,0x308
 .Lrestart_part2:
        lhi     %r0,0                   # Load r0 with zero
        lhi     %r1,2                   # Use mode 2 = ESAME (dump)
index d46d0b0b2cdab03eab29c059a1dd0ab8d885a802..533430307da8f55cafb70b69441691fdf2be6702 100644 (file)
@@ -137,7 +137,7 @@ enum {
        INSTR_RSI_RRP,
        INSTR_RSL_LRDFU, INSTR_RSL_R0RD,
        INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD,
-       INSTR_RSY_RDRM,
+       INSTR_RSY_RDRM, INSTR_RSY_RMRD,
        INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD,
        INSTR_RS_RURD,
        INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM,
@@ -307,6 +307,7 @@ static const unsigned char formats[][7] = {
        [INSTR_RSY_AARD]  = { 0xff, A_8,A_12,D20_20,B_16,0,0 },
        [INSTR_RSY_CCRD]  = { 0xff, C_8,C_12,D20_20,B_16,0,0 },
        [INSTR_RSY_RDRM]  = { 0xff, R_8,D20_20,B_16,U4_12,0,0 },
+       [INSTR_RSY_RMRD]  = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
        [INSTR_RSY_RRRD]  = { 0xff, R_8,R_12,D20_20,B_16,0,0 },
        [INSTR_RSY_RURD]  = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
        [INSTR_RS_AARD]   = { 0xff, A_8,A_12,D_20,B_16,0,0 },
@@ -450,7 +451,8 @@ enum {
        LONG_INSN_VERLLV,
        LONG_INSN_VESRAV,
        LONG_INSN_VESRLV,
-       LONG_INSN_VSBCBI
+       LONG_INSN_VSBCBI,
+       LONG_INSN_STCCTM
 };
 
 static char *long_insn_name[] = {
@@ -530,6 +532,7 @@ static char *long_insn_name[] = {
        [LONG_INSN_VESRAV] = "vesrav",
        [LONG_INSN_VESRLV] = "vesrlv",
        [LONG_INSN_VSBCBI] = "vsbcbi",
+       [LONG_INSN_STCCTM] = "stcctm",
 };
 
 static struct s390_insn opcode[] = {
@@ -1655,6 +1658,7 @@ static struct s390_insn opcode_eb[] = {
        { "lric", 0x60, INSTR_RSY_RDRM },
        { "stric", 0x61, INSTR_RSY_RDRM },
        { "mric", 0x62, INSTR_RSY_RDRM },
+       { { 0, LONG_INSN_STCCTM }, 0x17, INSTR_RSY_RMRD },
 #endif
        { "rll", 0x1d, INSTR_RSY_RRRD },
        { "mvclu", 0x8e, INSTR_RSY_RRRD },
index 39badb9ca0b30c6b7b32ce720165e85c3c33319a..5c8651f3650937c06c8cfcd0f306104b89435b59 100644 (file)
@@ -2074,7 +2074,8 @@ static void do_reset_calls(void)
 
 u32 dump_prefix_page;
 
-void s390_reset_system(void (*func)(void *), void *data)
+void s390_reset_system(void (*fn_pre)(void),
+                      void (*fn_post)(void *), void *data)
 {
        struct _lowcore *lc;
 
@@ -2112,7 +2113,11 @@ void s390_reset_system(void (*func)(void *), void *data)
        /* Store status at absolute zero */
        store_status();
 
+       /* Call function before reset */
+       if (fn_pre)
+               fn_pre();
        do_reset_calls();
-       if (func)
-               func(data);
+       /* Call function after reset */
+       if (fn_post)
+               fn_post(data);
 }
index 4685337fa7c6bf9464630f5ec648ca65e53cea0c..fb0901ec4306b833920b4f1b9386afc64c5573d0 100644 (file)
@@ -103,21 +103,18 @@ static int __init machine_kdump_pm_init(void)
        return 0;
 }
 arch_initcall(machine_kdump_pm_init);
-#endif
 
 /*
  * Start kdump: We expect here that a store status has been done on our CPU
  */
 static void __do_machine_kdump(void *image)
 {
-#ifdef CONFIG_CRASH_DUMP
        int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
 
-       setup_regs();
        __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
        start_kdump(1);
-#endif
 }
+#endif
 
 /*
  * Check if kdump checksums are valid: We call purgatory with parameter "0"
@@ -249,18 +246,18 @@ static void __do_machine_kexec(void *data)
  */
 static void __machine_kexec(void *data)
 {
-       struct kimage *image = data;
-
        __arch_local_irq_stosm(0x04); /* enable DAT */
        pfault_fini();
        tracing_off();
        debug_locks_off();
-       if (image->type == KEXEC_TYPE_CRASH) {
+#ifdef CONFIG_CRASH_DUMP
+       if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) {
+
                lgr_info_log();
-               s390_reset_system(__do_machine_kdump, data);
-       } else {
-               s390_reset_system(__do_machine_kexec, data);
-       }
+               s390_reset_system(setup_regs, __do_machine_kdump, data);
+       } else
+#endif
+               s390_reset_system(NULL, __do_machine_kexec, data);
        disabled_wait((unsigned long) __builtin_return_address(0));
 }
 
index 0b499f5cbe19c18d43c9360297407e86ed6a1212..370ff3a092a3952a843910a6d3c72bbdd946793b 100644 (file)
@@ -71,9 +71,30 @@ struct pcpu {
 };
 
 static u8 boot_cpu_type;
-static u16 boot_cpu_address;
 static struct pcpu pcpu_devices[NR_CPUS];
 
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+static unsigned int smp_max_threads __initdata = -1U;
+
+static int __init early_nosmt(char *s)
+{
+       smp_max_threads = 1;
+       return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+       get_option(&s, &smp_max_threads);
+       return 0;
+}
+early_param("smt", early_smt);
+
 /*
  * The smp_cpu_state_mutex must be held when changing the state or polarization
  * member of a pcpu data structure within the pcpu_devices arreay.
@@ -132,7 +153,7 @@ static inline int pcpu_running(struct pcpu *pcpu)
 /*
  * Find struct pcpu by cpu address.
  */
-static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
 {
        int cpu;
 
@@ -298,6 +319,32 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
        for (;;) ;
 }
 
+/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+       register unsigned long reg1 asm ("1") = (unsigned long) mtid;
+       int cc;
+
+       if (smp_cpu_mtid == mtid)
+               return 0;
+       asm volatile(
+               "       sigp    %1,0,%2 # sigp set multi-threading\n"
+               "       ipm     %0\n"
+               "       srl     %0,28\n"
+               : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
+               : "cc");
+       if (cc == 0) {
+               smp_cpu_mtid = mtid;
+               smp_cpu_mt_shift = 0;
+               while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+                       smp_cpu_mt_shift++;
+               pcpu_devices[0].address = stap();
+       }
+       return cc;
+}
+
 /*
  * Call function on an online CPU.
  */
@@ -512,22 +559,17 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
 
 #ifdef CONFIG_CRASH_DUMP
 
-static void __init smp_get_save_area(int cpu, u16 address)
+static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
 {
        void *lc = pcpu_devices[0].lowcore;
        struct save_area_ext *sa_ext;
        unsigned long vx_sa;
 
-       if (is_kdump_kernel())
-               return;
-       if (!OLDMEM_BASE && (address == boot_cpu_address ||
-                            ipl_info.type != IPL_TYPE_FCP_DUMP))
-               return;
        sa_ext = dump_save_area_create(cpu);
        if (!sa_ext)
                panic("could not allocate memory for save area\n");
-       if (address == boot_cpu_address) {
-               /* Copy the registers of the boot cpu. */
+       if (is_boot_cpu) {
+               /* Copy the registers of the boot CPU. */
                copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
                                 SAVE_AREA_BASE - PAGE_SIZE, 0);
                if (MACHINE_HAS_VX)
@@ -548,6 +590,64 @@ static void __init smp_get_save_area(int cpu, u16 address)
        free_page(vx_sa);
 }
 
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp dump
+ *    condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The boot CPU state is located in
+ *    the absolute lowcore of the memory stored in the HSA. The zcore code
+ *    will allocate the save area and copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
+ *    condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The firmware or the boot-loader
+ *    stored the registers of the boot CPU in the absolute lowcore in the
+ *    memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ *    or stand-alone kdump for DASD
+ *    condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The kexec code or the boot-loader
+ *    stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ *    condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ *    The state of all CPUs is stored in ELF sections in the memory of the
+ *    old system. The ELF sections are picked up by the crash_dump code
+ *    via elfcorehdr_addr.
+ */
+static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+{
+       unsigned int cpu, address, i, j;
+       int is_boot_cpu;
+
+       if (is_kdump_kernel())
+               /* Previous system stored the CPU states. Nothing to do. */
+               return;
+       if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+               /* No previous system present, normal boot. */
+               return;
+       /* Set multi-threading state to the previous system. */
+       pcpu_set_smt(sclp_get_mtid_prev());
+       /* Collect CPU states. */
+       cpu = 0;
+       for (i = 0; i < info->configured; i++) {
+               /* Skip CPUs with different CPU type. */
+               if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+                       continue;
+               for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
+                       address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
+                       is_boot_cpu = (address == pcpu_devices[0].address);
+                       if (is_boot_cpu && !OLDMEM_BASE)
+                               /* Skip boot CPU for standard zfcp dump. */
+                               continue;
+                       /* Get state for this CPu. */
+                       __smp_store_cpu_state(cpu, address, is_boot_cpu);
+               }
+       }
+}
+
 int smp_store_status(int cpu)
 {
        unsigned long vx_sa;
@@ -565,10 +665,6 @@ int smp_store_status(int cpu)
        return 0;
 }
 
-#else /* CONFIG_CRASH_DUMP */
-
-static inline void smp_get_save_area(int cpu, u16 address) { }
-
 #endif /* CONFIG_CRASH_DUMP */
 
 void smp_cpu_set_polarization(int cpu, int val)
@@ -590,11 +686,13 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
                use_sigp_detection = 1;
-               for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
+               for (address = 0; address <= MAX_CPU_ADDRESS;
+                    address += (1U << smp_cpu_mt_shift)) {
                        if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
                            SIGP_CC_NOT_OPERATIONAL)
                                continue;
-                       info->cpu[info->configured].address = address;
+                       info->cpu[info->configured].core_id =
+                               address >> smp_cpu_mt_shift;
                        info->configured++;
                }
                info->combined = info->configured;
@@ -608,7 +706,8 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
 {
        struct pcpu *pcpu;
        cpumask_t avail;
-       int cpu, nr, i;
+       int cpu, nr, i, j;
+       u16 address;
 
        nr = 0;
        cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
@@ -616,51 +715,76 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
        for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
                if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
                        continue;
-               if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
-                       continue;
-               pcpu = pcpu_devices + cpu;
-               pcpu->address = info->cpu[i].address;
-               pcpu->state = (i >= info->configured) ?
-                       CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
-               smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
-               set_cpu_present(cpu, true);
-               if (sysfs_add && smp_add_present_cpu(cpu) != 0)
-                       set_cpu_present(cpu, false);
-               else
-                       nr++;
-               cpu = cpumask_next(cpu, &avail);
+               address = info->cpu[i].core_id << smp_cpu_mt_shift;
+               for (j = 0; j <= smp_cpu_mtid; j++) {
+                       if (pcpu_find_address(cpu_present_mask, address + j))
+                               continue;
+                       pcpu = pcpu_devices + cpu;
+                       pcpu->address = address + j;
+                       pcpu->state =
+                               (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+                               CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+                       smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+                       set_cpu_present(cpu, true);
+                       if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+                               set_cpu_present(cpu, false);
+                       else
+                               nr++;
+                       cpu = cpumask_next(cpu, &avail);
+                       if (cpu >= nr_cpu_ids)
+                               break;
+               }
        }
        return nr;
 }
 
 static void __init smp_detect_cpus(void)
 {
-       unsigned int cpu, c_cpus, s_cpus;
+       unsigned int cpu, mtid, c_cpus, s_cpus;
        struct sclp_cpu_info *info;
+       u16 address;
 
+       /* Get CPU information */
        info = smp_get_cpu_info();
        if (!info)
                panic("smp_detect_cpus failed to allocate memory\n");
+
+       /* Find boot CPU type */
        if (info->has_cpu_type) {
-               for (cpu = 0; cpu < info->combined; cpu++) {
-                       if (info->cpu[cpu].address != boot_cpu_address)
-                               continue;
-                       /* The boot cpu dictates the cpu type. */
-                       boot_cpu_type = info->cpu[cpu].type;
-                       break;
-               }
+               address = stap();
+               for (cpu = 0; cpu < info->combined; cpu++)
+                       if (info->cpu[cpu].core_id == address) {
+                               /* The boot cpu dictates the cpu type. */
+                               boot_cpu_type = info->cpu[cpu].type;
+                               break;
+                       }
+               if (cpu >= info->combined)
+                       panic("Could not find boot CPU type");
        }
+
+#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
+       /* Collect CPU state of previous system */
+       smp_store_cpu_states(info);
+#endif
+
+       /* Set multi-threading state for the current system */
+       mtid = sclp_get_mtid(boot_cpu_type);
+       mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+       pcpu_set_smt(mtid);
+
+       /* Print number of CPUs */
        c_cpus = s_cpus = 0;
        for (cpu = 0; cpu < info->combined; cpu++) {
                if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
                        continue;
-               if (cpu < info->configured) {
-                       smp_get_save_area(c_cpus, info->cpu[cpu].address);
-                       c_cpus++;
-               } else
-                       s_cpus++;
+               if (cpu < info->configured)
+                       c_cpus += smp_cpu_mtid + 1;
+               else
+                       s_cpus += smp_cpu_mtid + 1;
        }
        pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+       /* Add CPUs present at boot */
        get_online_cpus();
        __smp_rescan_cpus(info, 0);
        put_online_cpus();
@@ -696,12 +820,23 @@ static void smp_start_secondary(void *cpuvoid)
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
        struct pcpu *pcpu;
-       int rc;
+       int base, i, rc;
 
        pcpu = pcpu_devices + cpu;
        if (pcpu->state != CPU_STATE_CONFIGURED)
                return -EIO;
-       if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
+       base = cpu - (cpu % (smp_cpu_mtid + 1));
+       for (i = 0; i <= smp_cpu_mtid; i++) {
+               if (base + i < nr_cpu_ids)
+                       if (cpu_online(base + i))
+                               break;
+       }
+       /*
+        * If this is the first CPU of the core to get online
+        * do an initial CPU reset.
+        */
+       if (i > smp_cpu_mtid &&
+           pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) !=
            SIGP_CC_ORDER_CODE_ACCEPTED)
                return -EIO;
 
@@ -774,7 +909,8 @@ void __init smp_fill_possible_mask(void)
 {
        unsigned int possible, sclp, cpu;
 
-       sclp = sclp_get_max_cpu() ?: nr_cpu_ids;
+       sclp = min(smp_max_threads, sclp_get_mtid_max() + 1);
+       sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids;
        possible = setup_possible_cpus ?: nr_cpu_ids;
        possible = min(possible, sclp);
        for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
@@ -796,9 +932,8 @@ void __init smp_prepare_boot_cpu(void)
 {
        struct pcpu *pcpu = pcpu_devices;
 
-       boot_cpu_address = stap();
        pcpu->state = CPU_STATE_CONFIGURED;
-       pcpu->address = boot_cpu_address;
+       pcpu->address = stap();
        pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
        pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
                + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
@@ -848,7 +983,7 @@ static ssize_t cpu_configure_store(struct device *dev,
                                   const char *buf, size_t count)
 {
        struct pcpu *pcpu;
-       int cpu, val, rc;
+       int cpu, val, rc, i;
        char delim;
 
        if (sscanf(buf, "%d %c", &val, &delim) != 1)
@@ -860,29 +995,43 @@ static ssize_t cpu_configure_store(struct device *dev,
        rc = -EBUSY;
        /* disallow configuration changes of online cpus and cpu 0 */
        cpu = dev->id;
-       if (cpu_online(cpu) || cpu == 0)
+       cpu -= cpu % (smp_cpu_mtid + 1);
+       if (cpu == 0)
                goto out;
+       for (i = 0; i <= smp_cpu_mtid; i++)
+               if (cpu_online(cpu + i))
+                       goto out;
        pcpu = pcpu_devices + cpu;
        rc = 0;
        switch (val) {
        case 0:
                if (pcpu->state != CPU_STATE_CONFIGURED)
                        break;
-               rc = sclp_cpu_deconfigure(pcpu->address);
+               rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
                if (rc)
                        break;
-               pcpu->state = CPU_STATE_STANDBY;
-               smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+               for (i = 0; i <= smp_cpu_mtid; i++) {
+                       if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+                               continue;
+                       pcpu[i].state = CPU_STATE_STANDBY;
+                       smp_cpu_set_polarization(cpu + i,
+                                                POLARIZATION_UNKNOWN);
+               }
                topology_expect_change();
                break;
        case 1:
                if (pcpu->state != CPU_STATE_STANDBY)
                        break;
-               rc = sclp_cpu_configure(pcpu->address);
+               rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
                if (rc)
                        break;
-               pcpu->state = CPU_STATE_CONFIGURED;
-               smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+               for (i = 0; i <= smp_cpu_mtid; i++) {
+                       if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+                               continue;
+                       pcpu[i].state = CPU_STATE_CONFIGURED;
+                       smp_cpu_set_polarization(cpu + i,
+                                                POLARIZATION_UNKNOWN);
+               }
                topology_expect_change();
                break;
        default:
index 811f542b8ed4adc3e209924e57c618e2d32e923c..85565f1ff4743abbf546d67eb35ff2cb10e09e91 100644 (file)
@@ -194,6 +194,14 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
        seq_printf(m, "LPAR CPUs Reserved:   %d\n", info->cpus_reserved);
        seq_printf(m, "LPAR CPUs Dedicated:  %d\n", info->cpus_dedicated);
        seq_printf(m, "LPAR CPUs Shared:     %d\n", info->cpus_shared);
+       if (info->mt_installed & 0x80) {
+               seq_printf(m, "LPAR CPUs G-MTID:     %d\n",
+                          info->mt_general & 0x1f);
+               seq_printf(m, "LPAR CPUs S-MTID:     %d\n",
+                          info->mt_installed & 0x1f);
+               seq_printf(m, "LPAR CPUs PS-MTID:    %d\n",
+                          info->mt_psmtid & 0x1f);
+       }
 }
 
 static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
index b93bed76ea94026d45c782eebe75c08dfb1a323d..24ee33f1af24228e04686700523819fcb77afdf2 100644 (file)
@@ -59,32 +59,50 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
        return mask;
 }
 
-static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
+static cpumask_t cpu_thread_map(unsigned int cpu)
+{
+       cpumask_t mask;
+       int i;
+
+       cpumask_copy(&mask, cpumask_of(cpu));
+       if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
+               return mask;
+       cpu -= cpu % (smp_cpu_mtid + 1);
+       for (i = 0; i <= smp_cpu_mtid; i++)
+               if (cpu_present(cpu + i))
+                       cpumask_set_cpu(cpu + i, &mask);
+       return mask;
+}
+
+static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
                                          struct mask_info *book,
                                          struct mask_info *socket,
                                          int one_socket_per_cpu)
 {
-       unsigned int cpu;
+       unsigned int core;
 
-       for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) {
-               unsigned int rcpu;
-               int lcpu;
+       for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
+               unsigned int rcore;
+               int lcpu, i;
 
-               rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin;
-               lcpu = smp_find_processor_id(rcpu);
+               rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+               lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
                if (lcpu < 0)
                        continue;
-               cpumask_set_cpu(lcpu, &book->mask);
-               cpu_topology[lcpu].book_id = book->id;
-               cpumask_set_cpu(lcpu, &socket->mask);
-               cpu_topology[lcpu].core_id = rcpu;
-               if (one_socket_per_cpu) {
-                       cpu_topology[lcpu].socket_id = rcpu;
-                       socket = socket->next;
-               } else {
-                       cpu_topology[lcpu].socket_id = socket->id;
+               for (i = 0; i <= smp_cpu_mtid; i++) {
+                       cpu_topology[lcpu + i].book_id = book->id;
+                       cpu_topology[lcpu + i].core_id = rcore;
+                       cpu_topology[lcpu + i].thread_id = lcpu + i;
+                       cpumask_set_cpu(lcpu + i, &book->mask);
+                       cpumask_set_cpu(lcpu + i, &socket->mask);
+                       if (one_socket_per_cpu)
+                               cpu_topology[lcpu + i].socket_id = rcore;
+                       else
+                               cpu_topology[lcpu + i].socket_id = socket->id;
+                       smp_cpu_set_polarization(lcpu + i, tl_core->pp);
                }
-               smp_cpu_set_polarization(lcpu, tl_cpu->pp);
+               if (one_socket_per_cpu)
+                       socket = socket->next;
        }
        return socket;
 }
@@ -108,7 +126,7 @@ static void clear_masks(void)
 static union topology_entry *next_tle(union topology_entry *tle)
 {
        if (!tle->nl)
-               return (union topology_entry *)((struct topology_cpu *)tle + 1);
+               return (union topology_entry *)((struct topology_core *)tle + 1);
        return (union topology_entry *)((struct topology_container *)tle + 1);
 }
 
@@ -231,9 +249,11 @@ static void update_cpu_masks(void)
 
        spin_lock_irqsave(&topology_lock, flags);
        for_each_possible_cpu(cpu) {
+               cpu_topology[cpu].thread_mask = cpu_thread_map(cpu);
                cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu);
                cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu);
                if (!MACHINE_HAS_TOPOLOGY) {
+                       cpu_topology[cpu].thread_id = cpu;
                        cpu_topology[cpu].core_id = cpu;
                        cpu_topology[cpu].socket_id = cpu;
                        cpu_topology[cpu].book_id = cpu;
@@ -445,6 +465,12 @@ int topology_cpu_init(struct cpu *cpu)
        return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
 }
 
+const struct cpumask *cpu_thread_mask(int cpu)
+{
+       return &cpu_topology[cpu].thread_mask;
+}
+
+
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
        return &cpu_topology[cpu].core_mask;
@@ -456,6 +482,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
 }
 
 static struct sched_domain_topology_level s390_topology[] = {
+       { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
        { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
        { cpu_book_mask, SD_INIT_NAME(BOOK) },
        { cpu_cpu_mask, SD_INIT_NAME(DIE) },
index e34122e539a16bad4e5727f881f982e4829cd8f0..e53d3595a7c8c1a54400d5fb3e984004cd864219 100644 (file)
@@ -15,6 +15,8 @@
 #include <asm/cputime.h>
 #include <asm/vtimer.h>
 #include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
 
 static void virt_timer_expire(void);
 
@@ -23,6 +25,10 @@ static DEFINE_SPINLOCK(virt_timer_lock);
 static atomic64_t virt_timer_current;
 static atomic64_t virt_timer_elapsed;
 
+static DEFINE_PER_CPU(u64, mt_cycles[32]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+
 static inline u64 get_vtimer(void)
 {
        u64 timer;
@@ -61,6 +67,8 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 {
        struct thread_info *ti = task_thread_info(tsk);
        u64 timer, clock, user, system, steal;
+       u64 user_scaled, system_scaled;
+       int i;
 
        timer = S390_lowcore.last_update_timer;
        clock = S390_lowcore.last_update_clock;
@@ -76,15 +84,49 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
        S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
        S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
+       /* Do MT utilization calculation */
+       if (smp_cpu_mtid) {
+               u64 cycles_new[32], *cycles_old;
+               u64 delta, mult, div;
+
+               cycles_old = this_cpu_ptr(mt_cycles);
+               if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) {
+                       mult = div = 0;
+                       for (i = 0; i <= smp_cpu_mtid; i++) {
+                               delta = cycles_new[i] - cycles_old[i];
+                               mult += delta;
+                               div += (i + 1) * delta;
+                       }
+                       if (mult > 0) {
+                               /* Update scaling factor */
+                               __this_cpu_write(mt_scaling_mult, mult);
+                               __this_cpu_write(mt_scaling_div, div);
+                               memcpy(cycles_old, cycles_new,
+                                      sizeof(u64) * (smp_cpu_mtid + 1));
+                       }
+               }
+       }
+
        user = S390_lowcore.user_timer - ti->user_timer;
        S390_lowcore.steal_timer -= user;
        ti->user_timer = S390_lowcore.user_timer;
-       account_user_time(tsk, user, user);
 
        system = S390_lowcore.system_timer - ti->system_timer;
        S390_lowcore.steal_timer -= system;
        ti->system_timer = S390_lowcore.system_timer;
-       account_system_time(tsk, hardirq_offset, system, system);
+
+       user_scaled = user;
+       system_scaled = system;
+       /* Do MT utilization scaling */
+       if (smp_cpu_mtid) {
+               u64 mult = __this_cpu_read(mt_scaling_mult);
+               u64 div = __this_cpu_read(mt_scaling_div);
+
+               user_scaled = (user_scaled * mult) / div;
+               system_scaled = (system_scaled * mult) / div;
+       }
+       account_user_time(tsk, user, user_scaled);
+       account_system_time(tsk, hardirq_offset, system, system_scaled);
 
        steal = S390_lowcore.steal_timer;
        if ((s64) steal > 0) {
@@ -126,7 +168,7 @@ void vtime_account_user(struct task_struct *tsk)
 void vtime_account_irq_enter(struct task_struct *tsk)
 {
        struct thread_info *ti = task_thread_info(tsk);
-       u64 timer, system;
+       u64 timer, system, system_scaled;
 
        timer = S390_lowcore.last_update_timer;
        S390_lowcore.last_update_timer = get_vtimer();
@@ -135,7 +177,15 @@ void vtime_account_irq_enter(struct task_struct *tsk)
        system = S390_lowcore.system_timer - ti->system_timer;
        S390_lowcore.steal_timer -= system;
        ti->system_timer = S390_lowcore.system_timer;
-       account_system_time(tsk, 0, system, system);
+       system_scaled = system;
+       /* Do MT utilization scaling */
+       if (smp_cpu_mtid) {
+               u64 mult = __this_cpu_read(mt_scaling_mult);
+               u64 div = __this_cpu_read(mt_scaling_div);
+
+               system_scaled = (system_scaled * mult) / div;
+       }
+       account_system_time(tsk, 0, system, system_scaled);
 
        virt_timer_forward(system);
 }
index 5bd6cb145a87d4bfd4e11f7ca139bac1f7ec51e3..daf6cd5079ec3e0052c64ba24f2ce1f5ea9f623d 100644 (file)
@@ -20,26 +20,31 @@ struct read_info_sccb {
        struct  sccb_header header;     /* 0-7 */
        u16     rnmax;                  /* 8-9 */
        u8      rnsize;                 /* 10 */
-       u8      _reserved0[16 - 11];    /* 11-15 */
+       u8      _pad_11[16 - 11];       /* 11-15 */
        u16     ncpurl;                 /* 16-17 */
        u16     cpuoff;                 /* 18-19 */
-       u8      _reserved7[24 - 20];    /* 20-23 */
+       u8      _pad_20[24 - 20];       /* 20-23 */
        u8      loadparm[8];            /* 24-31 */
-       u8      _reserved1[48 - 32];    /* 32-47 */
+       u8      _pad_32[42 - 32];       /* 32-41 */
+       u8      fac42;                  /* 42 */
+       u8      fac43;                  /* 43 */
+       u8      _pad_44[48 - 44];       /* 44-47 */
        u64     facilities;             /* 48-55 */
-       u8      _reserved2a[76 - 56];   /* 56-75 */
+       u8      _pad_56[66 - 56];       /* 56-65 */
+       u8      fac66;                  /* 66 */
+       u8      _pad_67[76 - 67];       /* 67-83 */
        u32     ibc;                    /* 76-79 */
-       u8      _reserved2b[84 - 80];   /* 80-83 */
+       u8      _pad80[84 - 80];        /* 80-83 */
        u8      fac84;                  /* 84 */
        u8      fac85;                  /* 85 */
-       u8      _reserved3[91 - 86];    /* 86-90 */
+       u8      _pad_86[91 - 86];       /* 86-90 */
        u8      flags;                  /* 91 */
-       u8      _reserved4[100 - 92];   /* 92-99 */
+       u8      _pad_92[100 - 92];      /* 92-99 */
        u32     rnsize2;                /* 100-103 */
        u64     rnmax2;                 /* 104-111 */
-       u8      _reserved5[120 - 112];  /* 112-119 */
+       u8      _pad_112[120 - 112];    /* 112-119 */
        u16     hcpua;                  /* 120-121 */
-       u8      _reserved6[4096 - 122]; /* 122-4095 */
+       u8      _pad_122[4096 - 122];   /* 122-4095 */
 } __packed __aligned(PAGE_SIZE);
 
 static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata;
@@ -50,6 +55,10 @@ static unsigned int sclp_max_cpu;
 static struct sclp_ipl_info sclp_ipl_info;
 static unsigned char sclp_siif;
 static u32 sclp_ibc;
+static unsigned int sclp_mtid;
+static unsigned int sclp_mtid_cp;
+static unsigned int sclp_mtid_max;
+static unsigned int sclp_mtid_prev;
 
 u64 sclp_facilities;
 u8 sclp_fac84;
@@ -128,7 +137,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
        boot_cpu_address = stap();
        cpue = (void *)sccb + sccb->cpuoff;
        for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) {
-               if (boot_cpu_address != cpue->address)
+               if (boot_cpu_address != cpue->core_id)
                        continue;
                sclp_siif = cpue->siif;
                break;
@@ -139,6 +148,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
        if (sccb->flags & 0x2)
                sclp_ipl_info.has_dump = 1;
        memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN);
+
+       sclp_mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0;
+       sclp_mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0;
+       sclp_mtid_max = max(sclp_mtid, sclp_mtid_cp);
+       sclp_mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0;
 }
 
 bool __init sclp_has_linemode(void)
@@ -178,6 +192,21 @@ unsigned int sclp_get_ibc(void)
 }
 EXPORT_SYMBOL(sclp_get_ibc);
 
+unsigned int sclp_get_mtid(u8 cpu_type)
+{
+       return cpu_type ? sclp_mtid : sclp_mtid_cp;
+}
+
+unsigned int sclp_get_mtid_max(void)
+{
+       return sclp_mtid_max;
+}
+
+unsigned int sclp_get_mtid_prev(void)
+{
+       return sclp_mtid_prev;
+}
+
 /*
  * This function will be called after sclp_facilities_detect(), which gets
  * called from early.c code. The sclp_facilities_detect() function retrieves
index 10eb738fc81a88e4e510be25e7fa69b0f32463df..3578105989a0d724704058df6c83d9ec6ce52f16 100644 (file)
@@ -938,7 +938,7 @@ void reipl_ccw_dev(struct ccw_dev_id *devid)
 {
        struct subchannel_id uninitialized_var(schid);
 
-       s390_reset_system(NULL, NULL);
+       s390_reset_system(NULL, NULL, NULL);
        if (reipl_find_schid(devid, &schid) != 0)
                panic("IPL Device not found\n");
        do_reipl_asm(*((__u32*)&schid));