gru: bug fixes for GRU exception handling
authorJack Steiner <steiner@sgi.com>
Wed, 17 Jun 2009 23:28:19 +0000 (16:28 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 18 Jun 2009 20:03:59 +0000 (13:03 -0700)
Bug fixes for GRU exception handling.  Additional fields from the CBR must
be returned to the user to allow the user to correctly diagnose GRU
exceptions.

Handle endcase in TFH TLB miss handling.  Verify that TFH actually
indicates a pending exception.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/misc/sgi-gru/gru_instructions.h
drivers/misc/sgi-gru/grufault.c
drivers/misc/sgi-gru/gruhandles.h
drivers/misc/sgi-gru/grumain.c
drivers/misc/sgi-gru/gruprocfs.c
drivers/misc/sgi-gru/grutables.h

index 3fde33c1e8f3b64e10192194974ec0016ea95f00..2feb885ca96fcc501b289cc84bc081e946bd7176 100644 (file)
@@ -81,6 +81,8 @@ struct control_block_extended_exc_detail {
        int             exopc;
        long            exceptdet0;
        int             exceptdet1;
+       int             cbrstate;
+       int             cbrexecstatus;
 };
 
 /*
@@ -107,7 +109,8 @@ struct gru_instruction_bits {
     unsigned char              reserved2: 2;
     unsigned char              istatus:   2;
     unsigned char              isubstatus:4;
-    unsigned char              reserved3: 2;
+    unsigned char              reserved3: 1;
+    unsigned char              tlb_fault_color: 1;
     /* DW 1 */
     unsigned long              idef4;          /* 42 bits: TRi1, BufSize */
     /* DW 2-6 */
@@ -253,6 +256,21 @@ struct gru_instruction {
 #define CBE_CAUSE_RESPONSE_DATA_ERROR          (1 << 16)
 #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR    (1 << 17)
 
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT                  0
+#define CBR_EXS_INT_OCC_BIT                    1
+#define CBR_EXS_PENDING_BIT                    2
+#define CBR_EXS_QUEUED_BIT                     3
+#define CBR_EXS_TLBHW_BIT                      4
+#define CBR_EXS_EXCEPTION_BIT                  5
+
+#define CBR_EXS_ABORT_OCC                      (1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC                                (1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING                                (1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED                         (1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLBHW                          (1 << CBR_EXS_TLBHW_BIT)
+#define CBR_EXS_EXCEPTION                      (1 << CBR_EXS_EXCEPTION_BIT)
+
 /*
  * Exceptions are retried for the following cases. If any OTHER bits are set
  * in ecause, the exception is not retryable.
index ab118558552e96a3fea1071f406b47d88514e905..4089f862aa295f3c5001b139c157ba04363a056e 100644 (file)
@@ -334,6 +334,8 @@ static int gru_try_dropin(struct gru_thread_state *gts,
         * Might be a hardware race OR a stupid user. Ignore FMM because FMM
         * is a transient state.
         */
+       if (tfh->status != TFHSTATUS_EXCEPTION)
+               goto failnoexception;
        if (tfh->state == TFHSTATE_IDLE)
                goto failidle;
        if (tfh->state == TFHSTATE_MISS_FMM && cb)
@@ -401,8 +403,17 @@ failfmm:
        gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
        return 0;
 
+failnoexception:
+       /* TFH status did not show exception pending */
+       gru_flush_cache(tfh);
+       if (cb)
+               gru_flush_cache(cb);
+       STAT(tlb_dropin_fail_no_exception);
+       gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state);
+       return 0;
+
 failidle:
-       /* TFH was idle  - no miss pending */
+       /* TFH state was idle  - no miss pending */
        gru_flush_cache(tfh);
        if (cb)
                gru_flush_cache(cb);
@@ -472,7 +483,8 @@ irqreturn_t gru_intr(int irq, void *dev_id)
                 * This is running in interrupt context. Trylock the mmap_sem.
                 * If it fails, retry the fault in user context.
                 */
-               if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
+               if (!gts->ts_force_cch_reload &&
+                                       down_read_trylock(&gts->ts_mm->mmap_sem)) {
                        gru_try_dropin(gts, tfh, NULL);
                        up_read(&gts->ts_mm->mmap_sem);
                } else {
@@ -595,14 +607,19 @@ int gru_get_exception_detail(unsigned long arg)
                excdet.ecause = cbe->ecause;
                excdet.exceptdet0 = cbe->idef1upd;
                excdet.exceptdet1 = cbe->idef3upd;
+               excdet.cbrstate = cbe->cbrstate;
+               excdet.cbrexecstatus = cbe->cbrexecstatus;
                ret = 0;
        } else {
                ret = -EAGAIN;
        }
        gru_unlock_gts(gts);
 
-       gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
-               excdet.ecause);
+       gru_dbg(grudev,
+               "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+               "exdet0 0x%lx, exdet1 0x%x\n",
+               excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+               excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
        if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
                ret = -EFAULT;
        return ret;
index 1ed74d7508c8bb14181c181a07925d04041c6283..03b76a1993c36645cdb91f114bda35929b3f5bca 100644 (file)
@@ -251,15 +251,14 @@ struct gru_tlb_fault_handle {
        unsigned int fill1:9;
 
        unsigned int status:2;
-       unsigned int fill2:1;
-       unsigned int color:1;
+       unsigned int fill2:2;
        unsigned int state:3;
        unsigned int fill3:1;
 
-       unsigned int cause:7;           /* DW 0 - high 32 */
+       unsigned int cause:7;
        unsigned int fill4:1;
 
-       unsigned int indexway:12;
+       unsigned int indexway:12;       /* DW 0 - high 32 */
        unsigned int fill5:4;
 
        unsigned int ctxnum:4;
@@ -457,21 +456,7 @@ enum gru_cbr_state {
        CBRSTATE_BUSY_INTERRUPT,
 };
 
-/* CBE cbrexecstatus bits */
-#define CBR_EXS_ABORT_OCC_BIT                  0
-#define CBR_EXS_INT_OCC_BIT                    1
-#define CBR_EXS_PENDING_BIT                    2
-#define CBR_EXS_QUEUED_BIT                     3
-#define CBR_EXS_TLBHW_BIT                      4
-#define CBR_EXS_EXCEPTION_BIT                  5
-
-#define CBR_EXS_ABORT_OCC                      (1 << CBR_EXS_ABORT_OCC_BIT)
-#define CBR_EXS_INT_OCC                                (1 << CBR_EXS_INT_OCC_BIT)
-#define CBR_EXS_PENDING                                (1 << CBR_EXS_PENDING_BIT)
-#define CBR_EXS_QUEUED                         (1 << CBR_EXS_QUEUED_BIT)
-#define CBR_EXS_TLBHW                          (1 << CBR_EXS_TLBHW_BIT)
-#define CBR_EXS_EXCEPTION                      (1 << CBR_EXS_EXCEPTION_BIT)
-
+/* CBE cbrexecstatus bits  - defined in gru_instructions.h*/
 /* CBE ecause bits  - defined in gru_instructions.h */
 
 /*
index ec3f7a17d221e01bd82bc23b6a64025263627c65..374af38862e67aa476b589e4e0e3bf44f3f8b1f4 100644 (file)
@@ -599,6 +599,9 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload)
                                cch->sizeavail[i] = gts->ts_sizeavail;
                        gts->ts_tlb_int_select = gru_cpu_fault_map_id();
                        cch->tlb_int_select = gru_cpu_fault_map_id();
+                       cch->tfm_fault_bit_enable =
+                           (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+                           || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
                } else {
                        for (i = 0; i < 8; i++)
                                cch->asid[i] = 0;
index ee74821b171c9eff780b0079760c29b016e05371..c46c1c5f0c738f08f8d07c6d70149c426272875b 100644 (file)
@@ -84,6 +84,8 @@ static int statistics_show(struct seq_file *s, void *p)
        printstat(s, tlb_dropin_fail_range_active);
        printstat(s, tlb_dropin_fail_idle);
        printstat(s, tlb_dropin_fail_fmm);
+       printstat(s, tlb_dropin_fail_no_exception);
+       printstat(s, tlb_dropin_fail_no_exception_war);
        printstat(s, mmu_invalidate_range);
        printstat(s, mmu_invalidate_page);
        printstat(s, mmu_clear_flush_young);
@@ -158,8 +160,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
        unsigned long val;
        char buf[80];
 
-       if (copy_from_user
-           (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
+       if (strncpy_from_user(buf, userbuf, sizeof(buf) - 1) < 0)
                return -EFAULT;
        buf[count - 1] = '\0';
        if (!strict_strtoul(buf, 10, &val))
index bf1eeb7553edda963bfea5b1b7f8de28cccf0b28..ebf6183c1635878ac5e4d13b7277e0beebb0dfed 100644 (file)
@@ -207,6 +207,8 @@ struct gru_stats_s {
        atomic_long_t tlb_dropin_fail_range_active;
        atomic_long_t tlb_dropin_fail_idle;
        atomic_long_t tlb_dropin_fail_fmm;
+       atomic_long_t tlb_dropin_fail_no_exception;
+       atomic_long_t tlb_dropin_fail_no_exception_war;
        atomic_long_t mmu_invalidate_range;
        atomic_long_t mmu_invalidate_page;
        atomic_long_t mmu_clear_flush_young;