md/raid5: split wait_for_stripe and introduce wait_for_quiescent
authorYuanhan Liu <yuanhan.liu@linux.intel.com>
Fri, 8 May 2015 08:19:06 +0000 (18:19 +1000)
committerNeilBrown <neilb@suse.de>
Wed, 17 Jun 2015 00:00:21 +0000 (10:00 +1000)
I noticed heavy spin lock contention at get_active_stripe(), introduced
at being wake up stage, where a bunch of processes try to re-hold the
spin lock again.

After giving some thoughts on this issue, I found the lock could be
relieved(and even avoided) if we turn the wait_for_stripe to per
waitqueue for each lock hash and make the wake up exclusive: wake up
one process each time, which avoids the lock contention naturally.

Before go hacking with wait_for_stripe, I found it actually has 2
usages: for the array to enter or leave the quiescent state, and also
to wait for an available stripe in each of the hash lists.

So this patch splits the first usage off into a separate wait_queue,
wait_for_quiescent, and the next patch will turn the second usage into
one waitqueue for each hash value, and make it exclusive, to relieve
the lock contention.

v2: wake_up(wait_for_quiescent) when (active_stripes == 0)
    Commit log refactor suggestion from Neil.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid5.c
drivers/md/raid5.h

index b6793d2e051f3b278405f236e6623980bcdf1d04..a9112b39afee834eea5afae388eb167ec07527f4 100644 (file)
@@ -374,6 +374,8 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 
        if (do_wakeup) {
                wake_up(&conf->wait_for_stripe);
+               if (atomic_read(&conf->active_stripes) == 0)
+                       wake_up(&conf->wait_for_quiescent);
                if (conf->retry_read_aligned)
                        md_wakeup_thread(conf->mddev->thread);
        }
@@ -667,7 +669,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
        spin_lock_irq(conf->hash_locks + hash);
 
        do {
-               wait_event_lock_irq(conf->wait_for_stripe,
+               wait_event_lock_irq(conf->wait_for_quiescent,
                                    conf->quiesce == 0 || noquiesce,
                                    *(conf->hash_locks + hash));
                sh = __find_stripe(conf, sector, conf->generation - previous);
@@ -4760,7 +4762,7 @@ static void raid5_align_endio(struct bio *bi, int error)
                                         raid_bi, 0);
                bio_endio(raid_bi, 0);
                if (atomic_dec_and_test(&conf->active_aligned_reads))
-                       wake_up(&conf->wait_for_stripe);
+                       wake_up(&conf->wait_for_quiescent);
                return;
        }
 
@@ -4855,7 +4857,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                align_bi->bi_iter.bi_sector += rdev->data_offset;
 
                spin_lock_irq(&conf->device_lock);
-               wait_event_lock_irq(conf->wait_for_stripe,
+               wait_event_lock_irq(conf->wait_for_quiescent,
                                    conf->quiesce == 0,
                                    conf->device_lock);
                atomic_inc(&conf->active_aligned_reads);
@@ -5699,7 +5701,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                bio_endio(raid_bio, 0);
        }
        if (atomic_dec_and_test(&conf->active_aligned_reads))
-               wake_up(&conf->wait_for_stripe);
+               wake_up(&conf->wait_for_quiescent);
        return handled;
 }
 
@@ -6433,6 +6435,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                goto abort;
        spin_lock_init(&conf->device_lock);
        seqcount_init(&conf->gen_lock);
+       init_waitqueue_head(&conf->wait_for_quiescent);
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
        INIT_LIST_HEAD(&conf->handle_list);
@@ -7466,7 +7469,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                 * active stripes can drain
                 */
                conf->quiesce = 2;
-               wait_event_cmd(conf->wait_for_stripe,
+               wait_event_cmd(conf->wait_for_quiescent,
                                    atomic_read(&conf->active_stripes) == 0 &&
                                    atomic_read(&conf->active_aligned_reads) == 0,
                                    unlock_all_device_hash_locks_irq(conf),
@@ -7480,7 +7483,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
        case 0: /* re-enable writes */
                lock_all_device_hash_locks_irq(conf);
                conf->quiesce = 0;
-               wake_up(&conf->wait_for_stripe);
+               wake_up(&conf->wait_for_quiescent);
                wake_up(&conf->wait_for_overlap);
                unlock_all_device_hash_locks_irq(conf);
                break;
index 896d603ad0da964d2c45f22039d8b733f0bef26e..9b84b8820fc566c2c2029f55dba242014e8779b5 100644 (file)
@@ -511,6 +511,7 @@ struct r5conf {
        struct list_head        inactive_list[NR_STRIPE_HASH_LOCKS];
        atomic_t                empty_inactive_list_nr;
        struct llist_head       released_stripes;
+       wait_queue_head_t       wait_for_quiescent;
        wait_queue_head_t       wait_for_stripe;
        wait_queue_head_t       wait_for_overlap;
        unsigned long           cache_state;