parisc: Fix races in parisc_setup_cache_timing()
authorJohn David Anglin <dave.anglin@bell.net>
Mon, 21 Nov 2016 02:12:36 +0000 (21:12 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 2 Dec 2016 08:09:01 +0000 (09:09 +0100)
commit 741dc7bf1c7c7d93b853bb55efe77baa27e1b0a9 upstream.

Helge reported to me the following startup crash:

[    0.000000] Linux version 4.8.0-1-parisc64-smp (debian-kernel@lists.debian.org) (gcc version 5.4.1 20161019 (GCC) ) #1 SMP Debian 4.8.7-1 (2016-11-13)
[    0.000000] The 64-bit Kernel has started...
[    0.000000] Kernel default page size is 4 KB. Huge pages enabled with 1 MB physical and 2 MB virtual size.
[    0.000000] Determining PDC firmware type: System Map.
[    0.000000] model 9000/785/J5000
[    0.000000] Total Memory: 2048 MB
[    0.000000] Memory: 2018528K/2097152K available (9272K kernel code, 3053K rwdata, 1319K rodata, 1024K init, 840K bss, 78624K reserved, 0K cma-reserved)
[    0.000000] virtual kernel memory layout:
[    0.000000]     vmalloc : 0x0000000000008000 - 0x000000003f000000   (1007 MB)
[    0.000000]     memory  : 0x0000000040000000 - 0x00000000c0000000   (2048 MB)
[    0.000000]       .init : 0x0000000040100000 - 0x0000000040200000   (1024 kB)
[    0.000000]       .data : 0x0000000040b0e000 - 0x0000000040f533e0   (4372 kB)
[    0.000000]       .text : 0x0000000040200000 - 0x0000000040b0e000   (9272 kB)
[    0.768910] Brought up 1 CPUs
[    0.992465] NET: Registered protocol family 16
[    2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000
[    2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online
[    2.726692] Setting cache flush threshold to 1024 kB
[    2.729932] Not-handled unaligned insn 0x43ffff80
[    2.798114] Setting TLB flush threshold to 140 kB
[    2.928039] Unaligned handler failed, ret = -1
[    3.000419]       _______________________________
[    3.000419]      < Your System ate a SPARC! Gah! >
[    3.000419]       -------------------------------
[    3.000419]              \   ^__^
[    3.000419]                  (__)\       )\/\
[    3.000419]                   U  ||----w |
[    3.000419]                      ||     ||
[    9.340055] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1
[    9.448082] task: 00000000bfd48060 task.stack: 00000000bfd50000
[    9.528040]
[   10.760029] IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004025d154 000000004025d158
[   10.868052]  IIR: 43ffff80    ISR: 0000000000340000  IOR: 000001ff54150960
[   10.960029]  CPU:        1   CR30: 00000000bfd50000 CR31: 0000000011111111
[   11.052057]  ORIG_R28: 000000004021e3b4
[   11.100045]  IAOQ[0]: irq_exit+0x94/0x120
[   11.152062]  IAOQ[1]: irq_exit+0x98/0x120
[   11.208031]  RP(r2): irq_exit+0xb8/0x120
[   11.256074] Backtrace:
[   11.288067]  [<00000000402cd944>] cpu_startup_entry+0x1e4/0x598
[   11.368058]  [<0000000040109528>] smp_callin+0x2c0/0x2f0
[   11.436308]  [<00000000402b53fc>] update_curr+0x18c/0x2d0
[   11.508055]  [<00000000402b73b8>] dequeue_entity+0x2c0/0x1030
[   11.584040]  [<00000000402b3cc0>] set_next_entity+0x80/0xd30
[   11.660069]  [<00000000402c1594>] pick_next_task_fair+0x614/0x720
[   11.740085]  [<000000004020dd34>] __schedule+0x394/0xa60
[   11.808054]  [<000000004020e488>] schedule+0x88/0x118
[   11.876039]  [<0000000040283d3c>] rescuer_thread+0x4d4/0x5b0
[   11.948090]  [<000000004028fc4c>] kthread+0x1ec/0x248
[   12.016053]  [<0000000040205020>] end_fault_vector+0x20/0xc0
[   12.092239]  [<00000000402050c0>] _switch_to_ret+0x0/0xf40
[   12.164044]
[   12.184036] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-1-parisc64-smp #1 Debian 4.8.7-1
[   12.244040] Backtrace:
[   12.244040]  [<000000004021c480>] show_stack+0x68/0x80
[   12.244040]  [<00000000406f332c>] dump_stack+0xec/0x168
[   12.244040]  [<000000004021c74c>] die_if_kernel+0x25c/0x430
[   12.244040]  [<000000004022d320>] handle_unaligned+0xb48/0xb50
[   12.244040]
[   12.632066] ---[ end trace 9ca05a7215c7bbb2 ]---
[   12.692036] Kernel panic - not syncing: Attempted to kill the idle task!

We have the insn 0x43ffff80 in IIR but from IAOQ we should have:
   4025d150:   0f f3 20 df     ldd,s r19(r31),r31
   4025d154:   0f 9f 00 9c     ldw r31(ret0),ret0
   4025d158:   bf 80 20 58     cmpb,*<> r0,ret0,4025d18c <irq_exit+0xcc>

Cpu0 has just completed running parisc_setup_cache_timing:

[    2.429981] Releasing cpu 1 now, hpa=fffffffffffa2000
[    2.635751] CPU(s): 2 out of 2 PA8500 (PCX-W) at 440.000000 MHz online
[    2.726692] Setting cache flush threshold to 1024 kB
[    2.729932] Not-handled unaligned insn 0x43ffff80
[    2.798114] Setting TLB flush threshold to 140 kB
[    2.928039] Unaligned handler failed, ret = -1

From the backtrace, cpu1 is in smp_callin:

void __init smp_callin(void)
{
       int slave_id = cpu_now_booting;

       smp_cpu_init(slave_id);
       preempt_disable();

       flush_cache_all_local(); /* start with known state */
       flush_tlb_all_local(NULL);

       local_irq_enable();  /* Interrupts have been off until now */

       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

So, it has just flushed its caches and the TLB. It would seem either the
flushes in parisc_setup_cache_timing or smp_callin have corrupted kernel
memory.

The attached patch reworks parisc_setup_cache_timing to remove the races
in setting the cache and TLB flush thresholds. It also corrects the
number of bytes flushed in the TLB calculation.

The patch flushes the cache and TLB on cpu0 before starting the
secondary processors so that they are started from a known state.

Tested with a few reboots on c8000.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/parisc/kernel/cache.c
arch/parisc/kernel/setup.c

index cda6dbbe98426c4bd77a7c0ff31f0525b06fc2ee..fd5979f28ada3bdf272621e8a41ad99208386f9b 100644 (file)
@@ -351,6 +351,7 @@ void __init parisc_setup_cache_timing(void)
 {
        unsigned long rangetime, alltime;
        unsigned long size, start;
+       unsigned long threshold;
 
        alltime = mfctl(16);
        flush_data_cache();
@@ -364,17 +365,12 @@ void __init parisc_setup_cache_timing(void)
        printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
                alltime, size, rangetime);
 
-       /* Racy, but if we see an intermediate value, it's ok too... */
-       parisc_cache_flush_threshold = size * alltime / rangetime;
-
-       parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
-       if (!parisc_cache_flush_threshold)
-               parisc_cache_flush_threshold = FLUSH_THRESHOLD;
-
-       if (parisc_cache_flush_threshold > cache_info.dc_size)
-               parisc_cache_flush_threshold = cache_info.dc_size;
-
-       printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+       threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
+       if (threshold > cache_info.dc_size)
+               threshold = cache_info.dc_size;
+       if (threshold)
+               parisc_cache_flush_threshold = threshold;
+       printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
                parisc_cache_flush_threshold/1024);
 
        /* calculate TLB flush threshold */
@@ -383,7 +379,7 @@ void __init parisc_setup_cache_timing(void)
        flush_tlb_all();
        alltime = mfctl(16) - alltime;
 
-       size = PAGE_SIZE;
+       size = 0;
        start = (unsigned long) _text;
        rangetime = mfctl(16);
        while (start < (unsigned long) _end) {
@@ -396,13 +392,10 @@ void __init parisc_setup_cache_timing(void)
        printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
                alltime, size, rangetime);
 
-       parisc_tlb_flush_threshold = size * alltime / rangetime;
-       parisc_tlb_flush_threshold *= num_online_cpus();
-       parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
-       if (!parisc_tlb_flush_threshold)
-               parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
-
-       printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+       threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime);
+       if (threshold)
+               parisc_tlb_flush_threshold = threshold;
+       printk(KERN_INFO "TLB flush threshold set to %lu KiB\n",
                parisc_tlb_flush_threshold/1024);
 }
 
index 81d6f639194478fa96b33a7f30ba4d612c9cfa8f..2e66a887788e8781bf76b13cbd3fed02e6979259 100644 (file)
@@ -334,6 +334,10 @@ static int __init parisc_init(void)
        /* tell PDC we're Linux. Nevermind failure. */
        pdc_stable_write(0x40, &osid, sizeof(osid));
        
+       /* start with known state */
+       flush_cache_all_local();
+       flush_tlb_all_local(NULL);
+
        processor_init();
 #ifdef CONFIG_SMP
        pr_info("CPU(s): %d out of %d %s at %d.%06d MHz online\n",