Merge remote-tracking branches 'asoc/fix/blackfin', 'asoc/fix/da9055', 'asoc/fix...
[firefly-linux-kernel-4.4.55.git] / drivers / md / raid5.c
index cbb15716a5db31cf39a08b6a8a9df45d0bc9e0dc..16f5c21963db5391ed25fd1e185ab8399f353e74 100644 (file)
@@ -133,7 +133,7 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
 static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
 {
        int sectors = bio_sectors(bio);
-       if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
+       if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
                return bio->bi_next;
        else
                return NULL;
@@ -225,7 +225,7 @@ static void return_io(struct bio *return_bi)
 
                return_bi = bi->bi_next;
                bi->bi_next = NULL;
-               bi->bi_size = 0;
+               bi->bi_iter.bi_size = 0;
                trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                         bi, 0);
                bio_endio(bi, 0);
@@ -675,8 +675,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                         || !conf->inactive_blocked),
                                        *(conf->hash_locks + hash));
                                conf->inactive_blocked = 0;
-                       } else
+                       } else {
                                init_stripe(sh, sector, previous);
+                               atomic_inc(&sh->count);
+                       }
                } else {
                        spin_lock(&conf->device_lock);
                        if (atomic_read(&sh->count)) {
@@ -695,13 +697,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                        sh->group = NULL;
                                }
                        }
+                       atomic_inc(&sh->count);
                        spin_unlock(&conf->device_lock);
                }
        } while (sh == NULL);
 
-       if (sh)
-               atomic_inc(&sh->count);
-
        spin_unlock_irq(conf->hash_locks + hash);
        return sh;
 }
@@ -852,10 +852,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                bi->bi_rw, i);
                        atomic_inc(&sh->count);
                        if (use_new_offset(conf, sh))
-                               bi->bi_sector = (sh->sector
+                               bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->new_data_offset);
                        else
-                               bi->bi_sector = (sh->sector
+                               bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->data_offset);
                        if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
                                bi->bi_rw |= REQ_NOMERGE;
@@ -863,7 +863,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        bi->bi_vcnt = 1;
                        bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        bi->bi_io_vec[0].bv_offset = 0;
-                       bi->bi_size = STRIPE_SIZE;
+                       bi->bi_iter.bi_size = STRIPE_SIZE;
                        /*
                         * If this is discard request, set bi_vcnt 0. We don't
                         * want to confuse SCSI because SCSI will replace payload
@@ -899,15 +899,15 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                rbi->bi_rw, i);
                        atomic_inc(&sh->count);
                        if (use_new_offset(conf, sh))
-                               rbi->bi_sector = (sh->sector
+                               rbi->bi_iter.bi_sector = (sh->sector
                                                  + rrdev->new_data_offset);
                        else
-                               rbi->bi_sector = (sh->sector
+                               rbi->bi_iter.bi_sector = (sh->sector
                                                  + rrdev->data_offset);
                        rbi->bi_vcnt = 1;
                        rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        rbi->bi_io_vec[0].bv_offset = 0;
-                       rbi->bi_size = STRIPE_SIZE;
+                       rbi->bi_iter.bi_size = STRIPE_SIZE;
                        /*
                         * If this is discard request, set bi_vcnt 0. We don't
                         * want to confuse SCSI because SCSI will replace payload
@@ -935,24 +935,24 @@ static struct dma_async_tx_descriptor *
 async_copy_data(int frombio, struct bio *bio, struct page *page,
        sector_t sector, struct dma_async_tx_descriptor *tx)
 {
-       struct bio_vec *bvl;
+       struct bio_vec bvl;
+       struct bvec_iter iter;
        struct page *bio_page;
-       int i;
        int page_offset;
        struct async_submit_ctl submit;
        enum async_tx_flags flags = 0;
 
-       if (bio->bi_sector >= sector)
-               page_offset = (signed)(bio->bi_sector - sector) * 512;
+       if (bio->bi_iter.bi_sector >= sector)
+               page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
        else
-               page_offset = (signed)(sector - bio->bi_sector) * -512;
+               page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
 
        if (frombio)
                flags |= ASYNC_TX_FENCE;
        init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
 
-       bio_for_each_segment(bvl, bio, i) {
-               int len = bvl->bv_len;
+       bio_for_each_segment(bvl, bio, iter) {
+               int len = bvl.bv_len;
                int clen;
                int b_offset = 0;
 
@@ -968,8 +968,8 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
                        clen = len;
 
                if (clen > 0) {
-                       b_offset += bvl->bv_offset;
-                       bio_page = bvl->bv_page;
+                       b_offset += bvl.bv_offset;
+                       bio_page = bvl.bv_page;
                        if (frombio)
                                tx = async_memcpy(page, bio_page, page_offset,
                                                  b_offset, clen, &submit);
@@ -1012,7 +1012,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
                        BUG_ON(!dev->read);
                        rbi = dev->read;
                        dev->read = NULL;
-                       while (rbi && rbi->bi_sector <
+                       while (rbi && rbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                rbi2 = r5_next_bio(rbi, dev->sector);
                                if (!raid5_dec_bi_active_stripes(rbi)) {
@@ -1048,7 +1048,7 @@ static void ops_run_biofill(struct stripe_head *sh)
                        dev->read = rbi = dev->toread;
                        dev->toread = NULL;
                        spin_unlock_irq(&sh->stripe_lock);
-                       while (rbi && rbi->bi_sector <
+                       while (rbi && rbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                tx = async_copy_data(0, rbi, dev->page,
                                        dev->sector, tx);
@@ -1390,7 +1390,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                        wbi = dev->written = chosen;
                        spin_unlock_irq(&sh->stripe_lock);
 
-                       while (wbi && wbi->bi_sector <
+                       while (wbi && wbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                if (wbi->bi_rw & REQ_FUA)
                                        set_bit(R5_WantFUA, &dev->flags);
@@ -2111,6 +2111,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
                        set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
        } else {
                if (!uptodate) {
+                       set_bit(STRIPE_DEGRADED, &sh->state);
                        set_bit(WriteErrorSeen, &rdev->flags);
                        set_bit(R5_WriteError, &sh->dev[i].flags);
                        if (!test_and_set_bit(WantReplacement, &rdev->flags))
@@ -2614,7 +2615,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
        int firstwrite=0;
 
        pr_debug("adding bi b#%llu to stripe s#%llu\n",
-               (unsigned long long)bi->bi_sector,
+               (unsigned long long)bi->bi_iter.bi_sector,
                (unsigned long long)sh->sector);
 
        /*
@@ -2632,12 +2633,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                        firstwrite = 1;
        } else
                bip = &sh->dev[dd_idx].toread;
-       while (*bip && (*bip)->bi_sector < bi->bi_sector) {
-               if (bio_end_sector(*bip) > bi->bi_sector)
+       while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
+               if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
                        goto overlap;
                bip = & (*bip)->bi_next;
        }
-       if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
+       if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
                goto overlap;
 
        BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2651,7 +2652,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                sector_t sector = sh->dev[dd_idx].sector;
                for (bi=sh->dev[dd_idx].towrite;
                     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
-                            bi && bi->bi_sector <= sector;
+                            bi && bi->bi_iter.bi_sector <= sector;
                     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
                        if (bio_end_sector(bi) >= sector)
                                sector = bio_end_sector(bi);
@@ -2661,7 +2662,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
        }
 
        pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
-               (unsigned long long)(*bip)->bi_sector,
+               (unsigned long long)(*bip)->bi_iter.bi_sector,
                (unsigned long long)sh->sector, dd_idx);
        spin_unlock_irq(&sh->stripe_lock);
 
@@ -2736,7 +2737,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
                        wake_up(&conf->wait_for_overlap);
 
-               while (bi && bi->bi_sector <
+               while (bi && bi->bi_iter.bi_sector <
                        sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2755,7 +2756,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                bi = sh->dev[i].written;
                sh->dev[i].written = NULL;
                if (bi) bitmap_end = 1;
-               while (bi && bi->bi_sector <
+               while (bi && bi->bi_iter.bi_sector <
                       sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2779,7 +2780,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                        spin_unlock_irq(&sh->stripe_lock);
                        if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
                                wake_up(&conf->wait_for_overlap);
-                       while (bi && bi->bi_sector <
+                       while (bi && bi->bi_iter.bi_sector <
                               sh->dev[i].sector + STRIPE_SECTORS) {
                                struct bio *nextbi =
                                        r5_next_bio(bi, sh->dev[i].sector);
@@ -3003,7 +3004,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                        clear_bit(R5_UPTODATE, &dev->flags);
                                wbi = dev->written;
                                dev->written = NULL;
-                               while (wbi && wbi->bi_sector <
+                               while (wbi && wbi->bi_iter.bi_sector <
                                        dev->sector + STRIPE_SECTORS) {
                                        wbi2 = r5_next_bio(wbi, dev->sector);
                                        if (!raid5_dec_bi_active_stripes(wbi)) {
@@ -4095,7 +4096,7 @@ static int raid5_mergeable_bvec(struct request_queue *q,
 
 static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
 {
-       sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+       sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
        unsigned int chunk_sectors = mddev->chunk_sectors;
        unsigned int bio_sectors = bio_sectors(bio);
 
@@ -4232,9 +4233,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
        /*
         *      compute position
         */
-       align_bi->bi_sector =  raid5_compute_sector(conf, raid_bio->bi_sector,
-                                                   0,
-                                                   &dd_idx, NULL);
+       align_bi->bi_iter.bi_sector =
+               raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
+                                    0, &dd_idx, NULL);
 
        end_sector = bio_end_sector(align_bi);
        rcu_read_lock();
@@ -4259,7 +4260,8 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
 
                if (!bio_fits_rdev(align_bi) ||
-                   is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
+                   is_badblock(rdev, align_bi->bi_iter.bi_sector,
+                               bio_sectors(align_bi),
                                &first_bad, &bad_sectors)) {
                        /* too big in some way, or has a known bad block */
                        bio_put(align_bi);
@@ -4268,7 +4270,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                }
 
                /* No reshape active, so we can trust rdev->data_offset */
-               align_bi->bi_sector += rdev->data_offset;
+               align_bi->bi_iter.bi_sector += rdev->data_offset;
 
                spin_lock_irq(&conf->device_lock);
                wait_event_lock_irq(conf->wait_for_stripe,
@@ -4280,7 +4282,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                if (mddev->gendisk)
                        trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
                                              align_bi, disk_devt(mddev->gendisk),
-                                             raid_bio->bi_sector);
+                                             raid_bio->bi_iter.bi_sector);
                generic_make_request(align_bi);
                return 1;
        } else {
@@ -4463,8 +4465,8 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                /* Skip discard while reshape is happening */
                return;
 
-       logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-       last_sector = bi->bi_sector + (bi->bi_size>>9);
+       logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+       last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
 
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -4568,7 +4570,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                return;
        }
 
-       logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+       logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
        last_sector = bio_end_sector(bi);
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
@@ -5052,7 +5054,8 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
        int remaining;
        int handled = 0;
 
-       logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+       logical_sector = raid_bio->bi_iter.bi_sector &
+               ~((sector_t)STRIPE_SECTORS-1);
        sector = raid5_compute_sector(conf, logical_sector,
                                      0, &dd_idx, NULL);
        last_sector = bio_end_sector(raid_bio);
@@ -5511,23 +5514,43 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
        return sectors * (raid_disks - conf->max_degraded);
 }
 
+static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
+{
+       safe_put_page(percpu->spare_page);
+       kfree(percpu->scribble);
+       percpu->spare_page = NULL;
+       percpu->scribble = NULL;
+}
+
+static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
+{
+       if (conf->level == 6 && !percpu->spare_page)
+               percpu->spare_page = alloc_page(GFP_KERNEL);
+       if (!percpu->scribble)
+               percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+
+       if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
+               free_scratch_buffer(conf, percpu);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
 static void raid5_free_percpu(struct r5conf *conf)
 {
-       struct raid5_percpu *percpu;
        unsigned long cpu;
 
        if (!conf->percpu)
                return;
 
-       get_online_cpus();
-       for_each_possible_cpu(cpu) {
-               percpu = per_cpu_ptr(conf->percpu, cpu);
-               safe_put_page(percpu->spare_page);
-               kfree(percpu->scribble);
-       }
 #ifdef CONFIG_HOTPLUG_CPU
        unregister_cpu_notifier(&conf->cpu_notify);
 #endif
+
+       get_online_cpus();
+       for_each_possible_cpu(cpu)
+               free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
        put_online_cpus();
 
        free_percpu(conf->percpu);
@@ -5554,15 +5577,7 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               if (conf->level == 6 && !percpu->spare_page)
-                       percpu->spare_page = alloc_page(GFP_KERNEL);
-               if (!percpu->scribble)
-                       percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
-
-               if (!percpu->scribble ||
-                   (conf->level == 6 && !percpu->spare_page)) {
-                       safe_put_page(percpu->spare_page);
-                       kfree(percpu->scribble);
+               if (alloc_scratch_buffer(conf, percpu)) {
                        pr_err("%s: failed memory allocation for cpu%ld\n",
                               __func__, cpu);
                        return notifier_from_errno(-ENOMEM);
@@ -5570,10 +5585,7 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-               safe_put_page(percpu->spare_page);
-               kfree(percpu->scribble);
-               percpu->spare_page = NULL;
-               percpu->scribble = NULL;
+               free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
                break;
        default:
                break;
@@ -5585,40 +5597,29 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
 static int raid5_alloc_percpu(struct r5conf *conf)
 {
        unsigned long cpu;
-       struct page *spare_page;
-       struct raid5_percpu __percpu *allcpus;
-       void *scribble;
-       int err;
+       int err = 0;
 
-       allcpus = alloc_percpu(struct raid5_percpu);
-       if (!allcpus)
+       conf->percpu = alloc_percpu(struct raid5_percpu);
+       if (!conf->percpu)
                return -ENOMEM;
-       conf->percpu = allcpus;
+
+#ifdef CONFIG_HOTPLUG_CPU
+       conf->cpu_notify.notifier_call = raid456_cpu_notify;
+       conf->cpu_notify.priority = 0;
+       err = register_cpu_notifier(&conf->cpu_notify);
+       if (err)
+               return err;
+#endif
 
        get_online_cpus();
-       err = 0;
        for_each_present_cpu(cpu) {
-               if (conf->level == 6) {
-                       spare_page = alloc_page(GFP_KERNEL);
-                       if (!spare_page) {
-                               err = -ENOMEM;
-                               break;
-                       }
-                       per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
-               }
-               scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
-               if (!scribble) {
-                       err = -ENOMEM;
+               err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
+               if (err) {
+                       pr_err("%s: failed memory allocation for cpu%ld\n",
+                              __func__, cpu);
                        break;
                }
-               per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
        }
-#ifdef CONFIG_HOTPLUG_CPU
-       conf->cpu_notify.notifier_call = raid456_cpu_notify;
-       conf->cpu_notify.priority = 0;
-       if (err == 0)
-               err = register_cpu_notifier(&conf->cpu_notify);
-#endif
        put_online_cpus();
 
        return err;
@@ -6100,6 +6101,7 @@ static int run(struct mddev *mddev)
                blk_queue_io_min(mddev->queue, chunk_size);
                blk_queue_io_opt(mddev->queue, chunk_size *
                                 (conf->raid_disks - conf->max_degraded));
+               mddev->queue->limits.raid_partial_stripes_expensive = 1;
                /*
                 * We can only discard a whole stripe. It doesn't make sense to
                 * discard data disk but write parity disk