Merge tag 'md-3.9-fixes' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Mar 2013 22:49:49 +0000 (15:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Mar 2013 22:49:49 +0000 (15:49 -0700)
Pull md fixes from NeilBrown:
 "A few bugfixes for md

   - recent regressions in raid5
   - recent regressions in dmraid
   - a few instances of CONFIG_MULTICORE_RAID456 linger

  Several tagged for -stable"

* tag 'md-3.9-fixes' of git://neil.brown.name/md:
  md: remove CONFIG_MULTICORE_RAID456 entirely
  md/raid5: ensure sync and DISCARD don't happen at the same time.
  MD: Prevent sysfs operations on uninitialized kobjects
  MD RAID5: Avoid accessing gendisk or queue structs when not available
  md/raid5: schedule_construction should abort if nothing to do.

1  2 
drivers/md/raid5.c

diff --combined drivers/md/raid5.c
index 3ee2912889e7110274acabc28df3c6664abcb0f7,42a899728748a7caa564fe5fc7018824d6e0f3ca..24909eb13fec1b0bf22e40caa4ff7a76967f80e4
@@@ -184,6 -184,8 +184,6 @@@ static void return_io(struct bio *retur
                return_bi = bi->bi_next;
                bi->bi_next = NULL;
                bi->bi_size = 0;
 -              trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
 -                                       bi, 0);
                bio_endio(bi, 0);
                bi = return_bi;
        }
@@@ -363,9 -365,10 +363,9 @@@ static struct stripe_head *__find_strip
                                         short generation)
  {
        struct stripe_head *sh;
 -      struct hlist_node *hn;
  
        pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
 -      hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
 +      hlist_for_each_entry(sh, stripe_hash(conf, sector), hash)
                if (sh->sector == sector && sh->generation == generation)
                        return sh;
        pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
@@@ -671,9 -674,11 +671,11 @@@ static void ops_run_io(struct stripe_he
                        bi->bi_next = NULL;
                        if (rrdev)
                                set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
-                       trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
-                                             bi, disk_devt(conf->mddev->gendisk),
-                                             sh->dev[i].sector);
+                       if (conf->mddev->gendisk)
+                               trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+                                                     bi, disk_devt(conf->mddev->gendisk),
+                                                     sh->dev[i].sector);
                        generic_make_request(bi);
                }
                if (rrdev) {
                        rbi->bi_io_vec[0].bv_offset = 0;
                        rbi->bi_size = STRIPE_SIZE;
                        rbi->bi_next = NULL;
-                       trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
-                                             rbi, disk_devt(conf->mddev->gendisk),
-                                             sh->dev[i].sector);
+                       if (conf->mddev->gendisk)
+                               trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+                                                     rbi, disk_devt(conf->mddev->gendisk),
+                                                     sh->dev[i].sector);
                        generic_make_request(rbi);
                }
                if (!rdev && !rrdev) {
@@@ -2280,17 -2286,6 +2283,6 @@@ schedule_reconstruction(struct stripe_h
        int level = conf->level;
  
        if (rcw) {
-               /* if we are not expanding this is a proper write request, and
-                * there will be bios with new data to be drained into the
-                * stripe cache
-                */
-               if (!expand) {
-                       sh->reconstruct_state = reconstruct_state_drain_run;
-                       set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-               } else
-                       sh->reconstruct_state = reconstruct_state_run;
-               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
  
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                                s->locked++;
                        }
                }
+               /* if we are not expanding this is a proper write request, and
+                * there will be bios with new data to be drained into the
+                * stripe cache
+                */
+               if (!expand) {
+                       if (!s->locked)
+                               /* False alarm, nothing to do */
+                               return;
+                       sh->reconstruct_state = reconstruct_state_drain_run;
+                       set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+               } else
+                       sh->reconstruct_state = reconstruct_state_run;
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
                if (s->locked + conf->max_degraded == disks)
                        if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
                                atomic_inc(&conf->pending_full_writes);
                BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                        test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
  
-               sh->reconstruct_state = reconstruct_state_prexor_drain_run;
-               set_bit(STRIPE_OP_PREXOR, &s->ops_request);
-               set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                        if (i == pd_idx)
                                s->locked++;
                        }
                }
+               if (!s->locked)
+                       /* False alarm - nothing to do */
+                       return;
+               sh->reconstruct_state = reconstruct_state_prexor_drain_run;
+               set_bit(STRIPE_OP_PREXOR, &s->ops_request);
+               set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
        }
  
        /* keep the parity disk(s) locked while asynchronous operations
@@@ -2564,6 -2576,8 +2573,8 @@@ handle_failed_sync(struct r5conf *conf
        int i;
  
        clear_bit(STRIPE_SYNCING, &sh->state);
+       if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+               wake_up(&conf->wait_for_overlap);
        s->syncing = 0;
        s->replacing = 0;
        /* There is nothing more to do for sync/check/repair.
@@@ -2737,6 -2751,7 +2748,7 @@@ static void handle_stripe_clean_event(s
  {
        int i;
        struct r5dev *dev;
+       int discard_pending = 0;
  
        for (i = disks; i--; )
                if (sh->dev[i].written) {
                                                STRIPE_SECTORS,
                                         !test_bit(STRIPE_DEGRADED, &sh->state),
                                                0);
-                       }
-               } else if (test_bit(R5_Discard, &sh->dev[i].flags))
-                       clear_bit(R5_Discard, &sh->dev[i].flags);
+                       } else if (test_bit(R5_Discard, &dev->flags))
+                               discard_pending = 1;
+               }
+       if (!discard_pending &&
+           test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
+               clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
+               clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+               if (sh->qd_idx >= 0) {
+                       clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
+                       clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
+               }
+               /* now that discard is done we can proceed with any sync */
+               clear_bit(STRIPE_DISCARD, &sh->state);
+               if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+                       set_bit(STRIPE_HANDLE, &sh->state);
+       }
  
        if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
                if (atomic_dec_and_test(&conf->pending_full_writes))
@@@ -2826,8 -2855,10 +2852,10 @@@ static void handle_stripe_dirtying(stru
        set_bit(STRIPE_HANDLE, &sh->state);
        if (rmw < rcw && rmw > 0) {
                /* prefer read-modify-write, but need to get some data */
-               blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
-                                 (unsigned long long)sh->sector, rmw);
+               if (conf->mddev->queue)
+                       blk_add_trace_msg(conf->mddev->queue,
+                                         "raid5 rmw %llu %d",
+                                         (unsigned long long)sh->sector, rmw);
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                        if ((dev->towrite || i == sh->pd_idx) &&
                                }
                        }
                }
-               if (rcw)
+               if (rcw && conf->mddev->queue)
                        blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
                                          (unsigned long long)sh->sector,
                                          rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@@@ -3417,9 -3448,15 +3445,15 @@@ static void handle_stripe(struct stripe
                return;
        }
  
-       if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
-               set_bit(STRIPE_SYNCING, &sh->state);
-               clear_bit(STRIPE_INSYNC, &sh->state);
+       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+               spin_lock(&sh->stripe_lock);
+               /* Cannot process 'sync' concurrently with 'discard' */
+               if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+                   test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+                       set_bit(STRIPE_SYNCING, &sh->state);
+                       clear_bit(STRIPE_INSYNC, &sh->state);
+               }
+               spin_unlock(&sh->stripe_lock);
        }
        clear_bit(STRIPE_DELAYED, &sh->state);
  
            test_bit(STRIPE_INSYNC, &sh->state)) {
                md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
                clear_bit(STRIPE_SYNCING, &sh->state);
+               if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+                       wake_up(&conf->wait_for_overlap);
        }
  
        /* If the failed drives are just a ReadError, then we might need
@@@ -3878,6 -3917,8 +3914,6 @@@ static void raid5_align_endio(struct bi
        rdev_dec_pending(rdev, conf->mddev);
  
        if (!error && uptodate) {
 -              trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
 -                                       raid_bi, 0);
                bio_endio(raid_bi, 0);
                if (atomic_dec_and_test(&conf->active_aligned_reads))
                        wake_up(&conf->wait_for_stripe);
@@@ -3982,9 -4023,10 +4018,10 @@@ static int chunk_aligned_read(struct md
                atomic_inc(&conf->active_aligned_reads);
                spin_unlock_irq(&conf->device_lock);
  
-               trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
-                                     align_bi, disk_devt(mddev->gendisk),
-                                     raid_bio->bi_sector);
+               if (mddev->gendisk)
+                       trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+                                             align_bi, disk_devt(mddev->gendisk),
+                                             raid_bio->bi_sector);
                generic_make_request(align_bi);
                return 1;
        } else {
@@@ -4078,7 -4120,8 +4115,8 @@@ static void raid5_unplug(struct blk_plu
                }
                spin_unlock_irq(&conf->device_lock);
        }
-       trace_block_unplug(mddev->queue, cnt, !from_schedule);
+       if (mddev->queue)
+               trace_block_unplug(mddev->queue, cnt, !from_schedule);
        kfree(cb);
  }
  
@@@ -4141,6 -4184,13 +4179,13 @@@ static void make_discard_request(struc
                sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
                prepare_to_wait(&conf->wait_for_overlap, &w,
                                TASK_UNINTERRUPTIBLE);
+               set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
+               if (test_bit(STRIPE_SYNCING, &sh->state)) {
+                       release_stripe(sh);
+                       schedule();
+                       goto again;
+               }
+               clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
                spin_lock_irq(&sh->stripe_lock);
                for (d = 0; d < conf->raid_disks; d++) {
                        if (d == sh->pd_idx || d == sh->qd_idx)
                                goto again;
                        }
                }
+               set_bit(STRIPE_DISCARD, &sh->state);
                finish_wait(&conf->wait_for_overlap, &w);
                for (d = 0; d < conf->raid_disks; d++) {
                        if (d == sh->pd_idx || d == sh->qd_idx)
@@@ -4336,6 -4387,8 +4382,6 @@@ static void make_request(struct mddev *
                if ( rw == WRITE )
                        md_write_end(mddev);
  
 -              trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
 -                                       bi, 0);
                bio_endio(bi, 0);
        }
  }
@@@ -4712,8 -4765,11 +4758,8 @@@ static int  retry_aligned_read(struct r
                handled++;
        }
        remaining = raid5_dec_bi_active_stripes(raid_bio);
 -      if (remaining == 0) {
 -              trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
 -                                       raid_bio, 0);
 +      if (remaining == 0)
                bio_endio(raid_bio, 0);
 -      }
        if (atomic_dec_and_test(&conf->active_aligned_reads))
                wake_up(&conf->wait_for_stripe);
        return handled;