powerpc/eeh: Hotplug improvement
authorGavin Shan <shangw@linux.vnet.ibm.com>
Sun, 12 Jan 2014 06:13:45 +0000 (14:13 +0800)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 15 Jan 2014 02:58:29 +0000 (13:58 +1100)
When EEH error comes to one specific PCI device before its driver
is loaded, we will apply hotplug to recover the error. During the
plug time, the PCI device will be probed and its driver is loaded.
Then we wrongly calls to the error handlers if the driver supports
EEH explicitly.

The patch intends to fix by introducing flag EEH_DEV_NO_HANDLER and
set it before we remove the PCI device. In turn, we can avoid wrongly
calls the error handlers of the PCI device after its driver loaded.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/eeh.h
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_driver.c

index 7f8adc848cd6fd45ac3bdc339c7c8e1bf1819b3b..8b4b8e4a5c32a2e819253f85b19e42d427e8017c 100644 (file)
@@ -90,7 +90,8 @@ struct eeh_pe {
 #define EEH_DEV_IRQ_DISABLED   (1 << 3)        /* Interrupt disabled   */
 #define EEH_DEV_DISCONNECTED   (1 << 4)        /* Removing from PE     */
 
-#define EEH_DEV_SYSFS          (1 << 8)        /* Sysfs created        */
+#define EEH_DEV_NO_HANDLER     (1 << 8)        /* No error handler     */
+#define EEH_DEV_SYSFS          (1 << 9)        /* Sysfs created        */
 
 struct eeh_dev {
        int mode;                       /* EEH mode                     */
index f4b7a227f18350f3f46c078e8101019e388dbfb6..148db72a8c4371e69f79009683eb8b3baf12c3d6 100644 (file)
@@ -921,6 +921,13 @@ void eeh_add_device_late(struct pci_dev *dev)
                eeh_sysfs_remove_device(edev->pdev);
                edev->mode &= ~EEH_DEV_SYSFS;
 
+               /*
+                * We definitely should have the PCI device removed
+                * though it wasn't correctly. So we needn't call
+                * into error handler afterwards.
+                */
+               edev->mode |= EEH_DEV_NO_HANDLER;
+
                edev->pdev = NULL;
                dev->dev.archdata.edev = NULL;
        }
@@ -1023,6 +1030,14 @@ void eeh_remove_device(struct pci_dev *dev)
        else
                edev->mode |= EEH_DEV_DISCONNECTED;
 
+       /*
+        * We're removing from the PCI subsystem, that means
+        * the PCI device driver can't support EEH or not
+        * well. So we rely on hotplug completely to do recovery
+        * for the specific PCI device.
+        */
+       edev->mode |= EEH_DEV_NO_HANDLER;
+
        eeh_addr_cache_rmv_dev(dev);
        eeh_sysfs_remove_device(dev);
        edev->mode &= ~EEH_DEV_SYSFS;
index 4ef59c33777f0a4bd6bc0115e102db031b8f5094..7db39203a07341030fb394ea22b201c6d3ed9557 100644 (file)
@@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
        if (!driver) return NULL;
 
        if (!driver->err_handler ||
-           !driver->err_handler->mmio_enabled) {
+           !driver->err_handler->mmio_enabled ||
+           (edev->mode & EEH_DEV_NO_HANDLER)) {
                eeh_pcid_put(dev);
                return NULL;
        }
@@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata)
        eeh_enable_irq(dev);
 
        if (!driver->err_handler ||
-           !driver->err_handler->slot_reset) {
+           !driver->err_handler->slot_reset ||
+           (edev->mode & EEH_DEV_NO_HANDLER)) {
                eeh_pcid_put(dev);
                return NULL;
        }
@@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata)
        eeh_enable_irq(dev);
 
        if (!driver->err_handler ||
-           !driver->err_handler->resume) {
+           !driver->err_handler->resume ||
+           (edev->mode & EEH_DEV_NO_HANDLER)) {
+               edev->mode &= ~EEH_DEV_NO_HANDLER;
                eeh_pcid_put(dev);
                return NULL;
        }