md/raid5. Don't write to known bad block on doubtful devices.
authorNeilBrown <neilb@suse.de>
Thu, 28 Jul 2011 01:39:22 +0000 (11:39 +1000)
committerNeilBrown <neilb@suse.de>
Thu, 28 Jul 2011 01:39:22 +0000 (11:39 +1000)
If a device has seen write errors, don't write to any known
bad blocks on that device.

Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid5.c

index 9768a7d67148225ccafad99f473c1d4bc107729a..3fa3f20dc5f55e6a38cc25414e0d241c9c7f1b11 100644 (file)
@@ -526,6 +526,36 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        atomic_inc(&rdev->nr_pending);
                rcu_read_unlock();
 
+               /* We have already checked bad blocks for reads.  Now
+                * need to check for writes.
+                */
+               while ((rw & WRITE) && rdev &&
+                      test_bit(WriteErrorSeen, &rdev->flags)) {
+                       sector_t first_bad;
+                       int bad_sectors;
+                       int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+                                             &first_bad, &bad_sectors);
+                       if (!bad)
+                               break;
+
+                       if (bad < 0) {
+                               set_bit(BlockedBadBlocks, &rdev->flags);
+                               if (!conf->mddev->external &&
+                                   conf->mddev->flags) {
+                                       /* It is very unlikely, but we might
+                                        * still need to write out the
+                                        * bad block log - better give it
+                                        * a chance*/
+                                       md_check_recovery(conf->mddev);
+                               }
+                               md_wait_for_blocked_rdev(rdev, conf->mddev);
+                       } else {
+                               /* Acknowledged bad block - skip the write */
+                               rdev_dec_pending(rdev, conf->mddev);
+                               rdev = NULL;
+                       }
+               }
+
                if (rdev) {
                        if (s->syncing || s->expanding || s->expanded)
                                md_sync_acct(rdev->bdev, STRIPE_SECTORS);
@@ -3317,7 +3347,6 @@ finish:
 
        ops_run_io(sh, &s);
 
-
        if (s.dec_preread_active) {
                /* We delay this until after ops_run_io so that if make_request
                 * is waiting on a flush, it won't continue until the writes