From 78d1e02fac0595a8aa8a5064d1bd0c0ea55b22b0 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Fri, 20 Jul 2007 14:41:26 -0700 Subject: [PATCH] IB/ipath: Workaround problem of errormask register being overwritten On some system hardware, we are seeing moderately common cases of the chip errormask register being overwritten due to a chip bug in iba6120 that is triggered by a vendor-specific PCIe broadcast message. This patch merely checks periodically, and corrects it if needed (the overwrite can cause us to not get error and hardware error interrupts). Also, make dd->ipath_errormask the one, true canonical source for kr_errormask, and remove references to ipath_ignorederrs as it is currently unused. Signed-off-by: Dave Olson Signed-off-by: John Gregor Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 5 +- drivers/infiniband/hw/ipath/ipath_intr.c | 25 ++++----- drivers/infiniband/hw/ipath/ipath_kernel.h | 11 +--- drivers/infiniband/hw/ipath/ipath_stats.c | 54 ++++++++++++++++--- 4 files changed, 66 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 71e6c9d4a714..9dd0bacf8461 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, dd->ipath_hwerrmask); - dd->ipath_maskederrs = dd->ipath_ignorederrs; /* clear all */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); /* enable errors that are masked, at least this first time. */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ~dd->ipath_maskederrs); - /* clear any interrups up to this point (ints still not enabled) */ + dd->ipath_errormask = ipath_read_kreg64(dd, + dd->ipath_kregs->kr_errormask); + /* clear any interrupts up to this point (ints still not enabled) */ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); /* diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 0c075cf8316b..b29fe7e9b11a 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); - /* - * don't report errors that are masked (includes those always - * ignored) - */ + /* don't report errors that are masked */ errs &= ~dd->ipath_maskederrs; /* do these first, they are most important */ @@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * ones on this particular interrupt, which also isn't great */ dd->ipath_maskederrs |= dd->ipath_lasterror | errs; + dd->ipath_errormask &= ~dd->ipath_maskederrs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - ~dd->ipath_maskederrs); + dd->ipath_errormask); s_iserr = ipath_decode_err(msg, sizeof msg, - (dd->ipath_maskederrs & ~dd-> - ipath_ignorederrs)); + dd->ipath_maskederrs); - if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & + if (dd->ipath_maskederrs & ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) ipath_dev_err(dd, "Temporarily disabling " "error(s) %llx reporting; too frequent (%s)\n", - (unsigned long long) (dd->ipath_maskederrs & - ~dd->ipath_ignorederrs), msg); + (unsigned long long)dd->ipath_maskederrs, + msg); else { /* * rcvegrfull and rcvhdrqfull are "normal", @@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd) /* disable error interrupts, to avoid confusion */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); + /* also disable interrupts; errormask is sometimes overwriten */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); + /* * clear all sends, because they have may been * completed by usercode while in freeze mode, and @@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) for (i = 0; i < dd->ipath_pioavregs; i++) { /* deal with 6110 chip bug */ im = i > 3 ? ((i&1) ? i-1 : i+1) : i; - val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64))); + val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im); dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] = le64_to_cpu(val); } @@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, E_SPKT_ERRS_IGNORE); ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - ~dd->ipath_maskederrs); + dd->ipath_errormask); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL); ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); } diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index ef773298b805..7a7966f7e4ff 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -261,18 +261,10 @@ struct ipath_devdata { * limiting of hwerror reporting */ ipath_err_t ipath_lasthwerror; - /* - * errors masked because they occur too fast, also includes errors - * that are always ignored (ipath_ignorederrs) - */ + /* errors masked because they occur too fast */ ipath_err_t ipath_maskederrs; /* time in jiffies at which to re-enable maskederrs */ unsigned long ipath_unmasktime; - /* - * errors always ignored (masked), at least for a given - * chip/device, because they are wrong or not useful - */ - ipath_err_t ipath_ignorederrs; /* count of egrfull errors, combined for all ports */ u64 ipath_last_tidfull; /* for ipath_qcheck() */ @@ -436,6 +428,7 @@ struct ipath_devdata { u64 ipath_lastibcstat; /* hwerrmask shadow */ ipath_err_t ipath_hwerrmask; + ipath_err_t ipath_errormask; /* errormask shadow */ /* interrupt config reg shadow */ u64 ipath_intconfig; /* kr_sendpiobufbase value */ diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 73ed17d03188..bae4f56f7271 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd) } } +static void ipath_chk_errormask(struct ipath_devdata *dd) +{ + static u32 fixed; + u32 ctrl; + unsigned long errormask; + unsigned long hwerrs; + + if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED)) + return; + + errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); + + if (errormask == dd->ipath_errormask) + return; + fixed++; + + hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); + ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + dd->ipath_errormask); + + if ((hwerrs & dd->ipath_hwerrmask) || + (ctrl & INFINIPATH_C_FREEZEMODE)) { + /* force re-interrupt of pending events, just in case */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); + dev_info(&dd->pcidev->dev, + "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n", + fixed, errormask, (unsigned long)dd->ipath_errormask, + ctrl, hwerrs); + } else + ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n", + fixed, errormask, + (unsigned long)dd->ipath_errormask); +} + + /** * ipath_get_faststats - get word counters from chip before they overflow * @opaque - contains a pointer to the infinipath device ipath_devdata @@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque) dd->ipath_lasterror = 0; if (dd->ipath_lasthwerror) dd->ipath_lasthwerror = 0; - if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) + if (dd->ipath_maskederrs && time_after(jiffies, dd->ipath_unmasktime)) { char ebuf[256]; int iserr; iserr = ipath_decode_err(ebuf, sizeof ebuf, - (dd->ipath_maskederrs & ~dd-> - ipath_ignorederrs)); - if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & + dd->ipath_maskederrs); + if (dd->ipath_maskederrs & ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS )) ipath_dev_err(dd, "Re-enabling masked errors " @@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque) ipath_cdbg(ERRPKT, "Re-enabling packet" " problem interrupt (%s)\n", ebuf); } - dd->ipath_maskederrs = dd->ipath_ignorederrs; + + /* re-enable masked errors */ + dd->ipath_errormask |= dd->ipath_maskederrs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - ~dd->ipath_maskederrs); + dd->ipath_errormask); + dd->ipath_maskederrs = 0; } /* limit qfull messages to ~one per minute per port */ @@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque) } } + ipath_chk_errormask(dd); done: mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); } -- 2.34.1