md/raid1: make sequential read detection per disk based
authorShaohua Li <shli@kernel.org>
Tue, 31 Jul 2012 00:03:53 +0000 (10:03 +1000)
committerNeilBrown <neilb@suse.de>
Tue, 31 Jul 2012 00:03:53 +0000 (10:03 +1000)
Currently the sequential read detection is global wide. It's natural to make it
per disk based, which can improve the detection for concurrent multiple
sequential reads. And next patch will make SSD read balance not use distance
based algorithm, where this change help detect truly sequential read for SSD.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid1.c
drivers/md/raid1.h

index d3d3568b4fb1169d98df1d5aaba6167f891ed5f4..fb96c0c2db40e1a52ed49e1ee21ac027060952b6 100644 (file)
@@ -497,9 +497,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        const sector_t this_sector = r1_bio->sector;
        int sectors;
        int best_good_sectors;
-       int start_disk;
        int best_disk;
-       int i;
+       int disk;
        sector_t best_dist;
        struct md_rdev *rdev;
        int choose_first;
@@ -517,23 +516,16 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        best_good_sectors = 0;
 
        if (conf->mddev->recovery_cp < MaxSector &&
-           (this_sector + sectors >= conf->next_resync)) {
+           (this_sector + sectors >= conf->next_resync))
                choose_first = 1;
-               start_disk = 0;
-       } else {
+       else
                choose_first = 0;
-               start_disk = conf->last_used;
-       }
 
-       for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
+       for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
                sector_t dist;
                sector_t first_bad;
                int bad_sectors;
 
-               int disk = start_disk + i;
-               if (disk >= conf->raid_disks * 2)
-                       disk -= conf->raid_disks * 2;
-
                rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (r1_bio->bios[disk] == IO_BLOCKED
                    || rdev == NULL
@@ -594,7 +586,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                dist = abs(this_sector - conf->mirrors[disk].head_position);
                if (choose_first
                    /* Don't change to another disk for sequential reads */
-                   || conf->next_seq_sect == this_sector
+                   || conf->mirrors[disk].next_seq_sect == this_sector
                    || dist == 0
                    /* If device is idle, use it */
                    || atomic_read(&rdev->nr_pending) == 0) {
@@ -620,8 +612,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                        goto retry;
                }
                sectors = best_good_sectors;
-               conf->next_seq_sect = this_sector + sectors;
-               conf->last_used = best_disk;
+               conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
        }
        rcu_read_unlock();
        *max_sectors = sectors;
@@ -2599,7 +2590,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->recovery_disabled = mddev->recovery_disabled - 1;
 
        err = -EIO;
-       conf->last_used = -1;
        for (i = 0; i < conf->raid_disks * 2; i++) {
 
                disk = conf->mirrors + i;
@@ -2625,19 +2615,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
                        if (disk->rdev &&
                            (disk->rdev->saved_raid_disk < 0))
                                conf->fullsync = 1;
-               } else if (conf->last_used < 0)
-                       /*
-                        * The first working device is used as a
-                        * starting point to read balancing.
-                        */
-                       conf->last_used = i;
+               }
        }
 
-       if (conf->last_used < 0) {
-               printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
-                      mdname(mddev));
-               goto abort;
-       }
        err = -ENOMEM;
        conf->thread = md_register_thread(raid1d, mddev, "raid1");
        if (!conf->thread) {
@@ -2894,7 +2874,6 @@ static int raid1_reshape(struct mddev *mddev)
        conf->raid_disks = mddev->raid_disks = raid_disks;
        mddev->delta_disks = 0;
 
-       conf->last_used = 0; /* just make sure it is in-range */
        lower_barrier(conf);
 
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
index 4e3613daaea20c91d99c7cf3dabbc5222d770ede..3770b4a2766257fbe6d35f90ffed302b963c2eb9 100644 (file)
@@ -4,6 +4,11 @@
 struct raid1_info {
        struct md_rdev  *rdev;
        sector_t        head_position;
+
+       /* When choose the best device for a read (read_balance())
+        * we try to keep sequential reads one the same device
+        */
+       sector_t        next_seq_sect;
 };
 
 /*
@@ -29,12 +34,6 @@ struct r1conf {
                                                 */
        int                     raid_disks;
 
-       /* When choose the best device for a read (read_balance())
-        * we try to keep sequential reads one the same device
-        * using 'last_used' and 'next_seq_sect'
-        */
-       int                     last_used;
-       sector_t                next_seq_sect;
        /* During resync, read_balancing is only allowed on the part
         * of the array that has been resynced.  'next_resync' tells us
         * where that is.