Merge branch 'writeback-workqueue' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorJens Axboe <axboe@kernel.dk>
Tue, 2 Apr 2013 08:04:39 +0000 (10:04 +0200)
committerJens Axboe <axboe@kernel.dk>
Tue, 2 Apr 2013 08:04:39 +0000 (10:04 +0200)
Tejun writes:

-----

This is the pull request for the earlier patchset[1] with the same
name.  It's only three patches (the first one was committed to
workqueue tree) but the merge strategy is a bit involved due to the
dependencies.

* Because the conversion needs features from wq/for-3.10,
  block/for-3.10/core is based on rc3, and wq/for-3.10 has conflicts
  with rc3, I pulled mainline (rc5) into wq/for-3.10 to prevent those
  workqueue conflicts from flaring up in block tree.

* Resolving the issue that Jan and Dave raised about debugging
  requires arch-wide changes.  The patchset is being worked on[2] but
  it'll have to go through -mm after these changes show up in -next,
  and not included in this pull request.

The three commits are located in the following git branch.

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git writeback-workqueue

Pulling it into block/for-3.10/core produces a conflict in
drivers/md/raid5.c between the following two commits.

  e3620a3ad5 ("MD RAID5: Avoid accessing gendisk or queue structs when not available")
  2f6db2a707 ("raid5: use bio_reset()")

The conflict is trivial - one removes an "if ()" conditional while the
other removes "rbi->bi_next = NULL" right above it.  We just need to
remove both.  The merged branch is available at

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git block-test-merge

so that you can use it for verification.  The test merge commit has
proper merge description.

While these changes are a bit of pain to route, they make code simpler
and even have, while minute, measureable performance gain[3] even on a
workload which isn't particularly favorable to showing the benefits of
this conversion.

----

Fixed up the conflict.

Conflicts:
drivers/md/raid5.c

Signed-off-by: Jens Axboe <axboe@kernel.dk>
41 files changed:
block/blk-core.c
block/cfq-iosched.c
block/deadline-iosched.c
block/elevator.c
drivers/block/aoe/aoecmd.c
drivers/block/brd.c
drivers/block/floppy.c
drivers/block/pktcdvd.c
drivers/block/rbd.c
drivers/md/dm-crypt.c
drivers/md/dm-raid1.c
drivers/md/dm-stripe.c
drivers/md/dm-verity.c
drivers/md/faulty.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/message/fusion/mptsas.c
drivers/s390/block/dcssblk.c
drivers/scsi/libsas/sas_expander.c
drivers/scsi/mpt2sas/mpt2sas_transport.c
fs/bio-integrity.c
fs/bio.c
fs/btrfs/extent_io.c
fs/btrfs/volumes.c
fs/buffer.c
fs/direct-io.c
fs/exofs/ore.c
fs/exofs/ore_raid.c
fs/gfs2/lops.c
fs/jfs/jfs_logmgr.c
fs/logfs/dev_bdev.c
include/linux/bio.h
include/linux/blk_types.h
include/linux/blkdev.h
include/trace/events/block.h
mm/bounce.c
mm/page_io.c

index 074b758efc42cf116d61f4755f0107f491890980..f224d1793ee5e5975f427cc9259e587970f19884 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/list_sort.h>
 #include <linux/delay.h>
 #include <linux/ratelimit.h>
+#include <linux/pm_runtime.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -158,20 +159,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
        else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = -EIO;
 
-       if (unlikely(nbytes > bio->bi_size)) {
-               printk(KERN_ERR "%s: want %u bytes done, %u left\n",
-                      __func__, nbytes, bio->bi_size);
-               nbytes = bio->bi_size;
-       }
-
        if (unlikely(rq->cmd_flags & REQ_QUIET))
                set_bit(BIO_QUIET, &bio->bi_flags);
 
-       bio->bi_size -= nbytes;
-       bio->bi_sector += (nbytes >> 9);
-
-       if (bio_integrity(bio))
-               bio_integrity_advance(bio, nbytes);
+       bio_advance(bio, nbytes);
 
        /* don't actually finish bio if it's part of flush sequence */
        if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
@@ -1263,6 +1254,16 @@ void part_round_stats(int cpu, struct hd_struct *part)
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 
+#ifdef CONFIG_PM_RUNTIME
+static void blk_pm_put_request(struct request *rq)
+{
+       if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
+               pm_runtime_mark_last_busy(rq->q->dev);
+}
+#else
+static inline void blk_pm_put_request(struct request *rq) {}
+#endif
+
 /*
  * queue lock must be held
  */
@@ -1273,6 +1274,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
        if (unlikely(--req->ref_count))
                return;
 
+       blk_pm_put_request(req);
+
        elv_completed_request(q, req);
 
        /* this is a bio leak */
@@ -1596,7 +1599,7 @@ static void handle_bad_sector(struct bio *bio)
        printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
                        bdevname(bio->bi_bdev, b),
                        bio->bi_rw,
-                       (unsigned long long)bio->bi_sector + bio_sectors(bio),
+                       (unsigned long long)bio_end_sector(bio),
                        (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
 
        set_bit(BIO_EOF, &bio->bi_flags);
@@ -2052,6 +2055,28 @@ static void blk_account_io_done(struct request *req)
        }
 }
 
+#ifdef CONFIG_PM_RUNTIME
+/*
+ * Don't process normal requests when queue is suspended
+ * or in the process of suspending/resuming
+ */
+static struct request *blk_pm_peek_request(struct request_queue *q,
+                                          struct request *rq)
+{
+       if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
+           (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM))))
+               return NULL;
+       else
+               return rq;
+}
+#else
+static inline struct request *blk_pm_peek_request(struct request_queue *q,
+                                                 struct request *rq)
+{
+       return rq;
+}
+#endif
+
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
@@ -2074,6 +2099,11 @@ struct request *blk_peek_request(struct request_queue *q)
        int ret;
 
        while ((rq = __elv_next_request(q)) != NULL) {
+
+               rq = blk_pm_peek_request(q, rq);
+               if (!rq)
+                       break;
+
                if (!(rq->cmd_flags & REQ_STARTED)) {
                        /*
                         * This is the first time the device driver
@@ -2252,8 +2282,7 @@ EXPORT_SYMBOL(blk_fetch_request);
  **/
 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
-       int total_bytes, bio_nbytes, next_idx = 0;
-       struct bio *bio;
+       int total_bytes;
 
        if (!req->bio)
                return false;
@@ -2299,56 +2328,21 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 
        blk_account_io_completion(req, nr_bytes);
 
-       total_bytes = bio_nbytes = 0;
-       while ((bio = req->bio) != NULL) {
-               int nbytes;
+       total_bytes = 0;
+       while (req->bio) {
+               struct bio *bio = req->bio;
+               unsigned bio_bytes = min(bio->bi_size, nr_bytes);
 
-               if (nr_bytes >= bio->bi_size) {
+               if (bio_bytes == bio->bi_size)
                        req->bio = bio->bi_next;
-                       nbytes = bio->bi_size;
-                       req_bio_endio(req, bio, nbytes, error);
-                       next_idx = 0;
-                       bio_nbytes = 0;
-               } else {
-                       int idx = bio->bi_idx + next_idx;
 
-                       if (unlikely(idx >= bio->bi_vcnt)) {
-                               blk_dump_rq_flags(req, "__end_that");
-                               printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
-                                      __func__, idx, bio->bi_vcnt);
-                               break;
-                       }
+               req_bio_endio(req, bio, bio_bytes, error);
 
-                       nbytes = bio_iovec_idx(bio, idx)->bv_len;
-                       BIO_BUG_ON(nbytes > bio->bi_size);
+               total_bytes += bio_bytes;
+               nr_bytes -= bio_bytes;
 
-                       /*
-                        * not a complete bvec done
-                        */
-                       if (unlikely(nbytes > nr_bytes)) {
-                               bio_nbytes += nr_bytes;
-                               total_bytes += nr_bytes;
-                               break;
-                       }
-
-                       /*
-                        * advance to the next vector
-                        */
-                       next_idx++;
-                       bio_nbytes += nbytes;
-               }
-
-               total_bytes += nbytes;
-               nr_bytes -= nbytes;
-
-               bio = req->bio;
-               if (bio) {
-                       /*
-                        * end more in this run, or just return 'not-done'
-                        */
-                       if (unlikely(nr_bytes <= 0))
-                               break;
-               }
+               if (!nr_bytes)
+                       break;
        }
 
        /*
@@ -2364,16 +2358,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
                return false;
        }
 
-       /*
-        * if the request wasn't completed, update state
-        */
-       if (bio_nbytes) {
-               req_bio_endio(req, bio, bio_nbytes, error);
-               bio->bi_idx += next_idx;
-               bio_iovec(bio)->bv_offset += nr_bytes;
-               bio_iovec(bio)->bv_len -= nr_bytes;
-       }
-
        req->__data_len -= total_bytes;
        req->buffer = bio_data(req->bio);
 
@@ -3045,6 +3029,149 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
+#ifdef CONFIG_PM_RUNTIME
+/**
+ * blk_pm_runtime_init - Block layer runtime PM initialization routine
+ * @q: the queue of the device
+ * @dev: the device the queue belongs to
+ *
+ * Description:
+ *    Initialize runtime-PM-related fields for @q and start auto suspend for
+ *    @dev. Drivers that want to take advantage of request-based runtime PM
+ *    should call this function after @dev has been initialized, and its
+ *    request queue @q has been allocated, and runtime PM for it can not happen
+ *    yet(either due to disabled/forbidden or its usage_count > 0). In most
+ *    cases, driver should call this function before any I/O has taken place.
+ *
+ *    This function takes care of setting up using auto suspend for the device,
+ *    the autosuspend delay is set to -1 to make runtime suspend impossible
+ *    until an updated value is either set by user or by driver. Drivers do
+ *    not need to touch other autosuspend settings.
+ *
+ *    The block layer runtime PM is request based, so only works for drivers
+ *    that use request as their IO unit instead of those directly use bio's.
+ */
+void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
+{
+       q->dev = dev;
+       q->rpm_status = RPM_ACTIVE;
+       pm_runtime_set_autosuspend_delay(q->dev, -1);
+       pm_runtime_use_autosuspend(q->dev);
+}
+EXPORT_SYMBOL(blk_pm_runtime_init);
+
+/**
+ * blk_pre_runtime_suspend - Pre runtime suspend check
+ * @q: the queue of the device
+ *
+ * Description:
+ *    This function will check if runtime suspend is allowed for the device
+ *    by examining if there are any requests pending in the queue. If there
+ *    are requests pending, the device can not be runtime suspended; otherwise,
+ *    the queue's status will be updated to SUSPENDING and the driver can
+ *    proceed to suspend the device.
+ *
+ *    For the not allowed case, we mark last busy for the device so that
+ *    runtime PM core will try to autosuspend it some time later.
+ *
+ *    This function should be called near the start of the device's
+ *    runtime_suspend callback.
+ *
+ * Return:
+ *    0                - OK to runtime suspend the device
+ *    -EBUSY   - Device should not be runtime suspended
+ */
+int blk_pre_runtime_suspend(struct request_queue *q)
+{
+       int ret = 0;
+
+       spin_lock_irq(q->queue_lock);
+       if (q->nr_pending) {
+               ret = -EBUSY;
+               pm_runtime_mark_last_busy(q->dev);
+       } else {
+               q->rpm_status = RPM_SUSPENDING;
+       }
+       spin_unlock_irq(q->queue_lock);
+       return ret;
+}
+EXPORT_SYMBOL(blk_pre_runtime_suspend);
+
+/**
+ * blk_post_runtime_suspend - Post runtime suspend processing
+ * @q: the queue of the device
+ * @err: return value of the device's runtime_suspend function
+ *
+ * Description:
+ *    Update the queue's runtime status according to the return value of the
+ *    device's runtime suspend function and mark last busy for the device so
+ *    that PM core will try to auto suspend the device at a later time.
+ *
+ *    This function should be called near the end of the device's
+ *    runtime_suspend callback.
+ */
+void blk_post_runtime_suspend(struct request_queue *q, int err)
+{
+       spin_lock_irq(q->queue_lock);
+       if (!err) {
+               q->rpm_status = RPM_SUSPENDED;
+       } else {
+               q->rpm_status = RPM_ACTIVE;
+               pm_runtime_mark_last_busy(q->dev);
+       }
+       spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_post_runtime_suspend);
+
+/**
+ * blk_pre_runtime_resume - Pre runtime resume processing
+ * @q: the queue of the device
+ *
+ * Description:
+ *    Update the queue's runtime status to RESUMING in preparation for the
+ *    runtime resume of the device.
+ *
+ *    This function should be called near the start of the device's
+ *    runtime_resume callback.
+ */
+void blk_pre_runtime_resume(struct request_queue *q)
+{
+       spin_lock_irq(q->queue_lock);
+       q->rpm_status = RPM_RESUMING;
+       spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_pre_runtime_resume);
+
+/**
+ * blk_post_runtime_resume - Post runtime resume processing
+ * @q: the queue of the device
+ * @err: return value of the device's runtime_resume function
+ *
+ * Description:
+ *    Update the queue's runtime status according to the return value of the
+ *    device's runtime_resume function. If it is successfully resumed, process
+ *    the requests that are queued into the device's queue when it is resuming
+ *    and then mark last busy and initiate autosuspend for it.
+ *
+ *    This function should be called near the end of the device's
+ *    runtime_resume callback.
+ */
+void blk_post_runtime_resume(struct request_queue *q, int err)
+{
+       spin_lock_irq(q->queue_lock);
+       if (!err) {
+               q->rpm_status = RPM_ACTIVE;
+               __blk_run_queue(q);
+               pm_runtime_mark_last_busy(q->dev);
+               pm_runtime_autosuspend(q->dev);
+       } else {
+               q->rpm_status = RPM_SUSPENDED;
+       }
+       spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_post_runtime_resume);
+#endif
+
 int __init blk_dev_init(void)
 {
        BUILD_BUG_ON(__REQ_NR_BITS > 8 *
index 4f0ade74cfd04a1c48f22218a6c4369517efa88b..d5cd3131c57a36645bcf049e28ab9a46ab5ea559 100644 (file)
@@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
                return NULL;
 
        cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
-       if (cfqq) {
-               sector_t sector = bio->bi_sector + bio_sectors(bio);
-
-               return elv_rb_find(&cfqq->sort_list, sector);
-       }
+       if (cfqq)
+               return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
 
        return NULL;
 }
index 90037b5eb17fca86bc2212241a6d1916cf83960c..ba19a3afab7929cf3ff0857683ccaef40a007a77 100644 (file)
@@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
         * check for front merge
         */
        if (dd->front_merges) {
-               sector_t sector = bio->bi_sector + bio_sectors(bio);
+               sector_t sector = bio_end_sector(bio);
 
                __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
                if (__rq) {
index a0ffdd943c98aa5e0f39f102b98f1bc4cf9777f5..eba5b04c29b135bdc6b84ac80690d380d59dd8a5 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/blktrace_api.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 
 #include <trace/events/block.h>
 
@@ -536,6 +537,27 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
                e->type->ops.elevator_bio_merged_fn(q, rq, bio);
 }
 
+#ifdef CONFIG_PM_RUNTIME
+static void blk_pm_requeue_request(struct request *rq)
+{
+       if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
+               rq->q->nr_pending--;
+}
+
+static void blk_pm_add_request(struct request_queue *q, struct request *rq)
+{
+       if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
+           (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
+               pm_request_resume(q->dev);
+}
+#else
+static inline void blk_pm_requeue_request(struct request *rq) {}
+static inline void blk_pm_add_request(struct request_queue *q,
+                                     struct request *rq)
+{
+}
+#endif
+
 void elv_requeue_request(struct request_queue *q, struct request *rq)
 {
        /*
@@ -550,6 +572,8 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 
        rq->cmd_flags &= ~REQ_STARTED;
 
+       blk_pm_requeue_request(rq);
+
        __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
 
@@ -572,6 +596,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
        trace_block_rq_insert(q, rq);
 
+       blk_pm_add_request(q, rq);
+
        rq->q = q;
 
        if (rq->cmd_flags & REQ_SOFTBARRIER) {
index 25ef5c014fcaa4255e0e3009991b6a9ac1ff18c7..af96ca171238e1f9ca6499ca9d3f3f5f5194d648 100644 (file)
@@ -927,7 +927,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
        buf->resid = bio->bi_size;
        buf->sector = bio->bi_sector;
        bio_pageinc(bio);
-       buf->bv = bv = &bio->bi_io_vec[bio->bi_idx];
+       buf->bv = bv = bio_iovec(bio);
        buf->bv_resid = bv->bv_len;
        WARN_ON(buf->bv_resid == 0);
 }
index 531ceb31d0ff86a4aec90301ea508254ea443457..f1a29f8e9d33dbe45474172a24f952f32ce4ba47 100644 (file)
@@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
        int err = -EIO;
 
        sector = bio->bi_sector;
-       if (sector + (bio->bi_size >> SECTOR_SHIFT) >
-                                               get_capacity(bdev->bd_disk))
+       if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
                goto out;
 
        if (unlikely(bio->bi_rw & REQ_DISCARD)) {
index 2ddd64a9ffdee43429653806be3262a80c372240..83232639034eda69da39d8f84eb60e8f63339538 100644 (file)
@@ -3777,7 +3777,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
        bio_vec.bv_len = size;
        bio_vec.bv_offset = 0;
        bio.bi_vcnt = 1;
-       bio.bi_idx = 0;
        bio.bi_size = size;
        bio.bi_bdev = bdev;
        bio.bi_sector = 0;
index 2e7de7a59bfca01c4a35edaa1824a7a38f90e52f..11190424536a95a8cf1ca3eb84a46fc396930e69 100644 (file)
@@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
                        pd->iosched.successive_reads += bio->bi_size >> 10;
                else {
                        pd->iosched.successive_reads = 0;
-                       pd->iosched.last_write = bio->bi_sector + bio_sectors(bio);
+                       pd->iosched.last_write = bio_end_sector(bio);
                }
                if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
                        if (pd->read_speed == pd->write_speed) {
@@ -947,31 +947,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
        }
 }
 
-/*
- * Copy CD_FRAMESIZE bytes from src_bio into a destination page
- */
-static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs)
-{
-       unsigned int copy_size = CD_FRAMESIZE;
-
-       while (copy_size > 0) {
-               struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
-               void *vfrom = kmap_atomic(src_bvl->bv_page) +
-                       src_bvl->bv_offset + offs;
-               void *vto = page_address(dst_page) + dst_offs;
-               int len = min_t(int, copy_size, src_bvl->bv_len - offs);
-
-               BUG_ON(len < 0);
-               memcpy(vto, vfrom, len);
-               kunmap_atomic(vfrom);
-
-               seg++;
-               offs = 0;
-               dst_offs += len;
-               copy_size -= len;
-       }
-}
-
 /*
  * Copy all data for this packet to pkt->pages[], so that
  * a) The number of required segments for the write bio is minimized, which
@@ -1181,16 +1156,15 @@ static int pkt_start_recovery(struct packet_data *pkt)
        new_sector = new_block * (CD_FRAMESIZE >> 9);
        pkt->sector = new_sector;
 
+       bio_reset(pkt->bio);
+       pkt->bio->bi_bdev = pd->bdev;
+       pkt->bio->bi_rw = REQ_WRITE;
        pkt->bio->bi_sector = new_sector;
-       pkt->bio->bi_next = NULL;
-       pkt->bio->bi_flags = 1 << BIO_UPTODATE;
-       pkt->bio->bi_idx = 0;
+       pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE;
+       pkt->bio->bi_vcnt = pkt->frames;
 
-       BUG_ON(pkt->bio->bi_rw != REQ_WRITE);
-       BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
-       BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
-       BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
-       BUG_ON(pkt->bio->bi_private != pkt);
+       pkt->bio->bi_end_io = pkt_end_io_packet_write;
+       pkt->bio->bi_private = pkt;
 
        drop_super(sb);
        return 1;
@@ -1325,55 +1299,35 @@ try_next_bio:
  */
 static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
-       struct bio *bio;
        int f;
-       int frames_write;
        struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
 
+       bio_reset(pkt->w_bio);
+       pkt->w_bio->bi_sector = pkt->sector;
+       pkt->w_bio->bi_bdev = pd->bdev;
+       pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+       pkt->w_bio->bi_private = pkt;
+
+       /* XXX: locking? */
        for (f = 0; f < pkt->frames; f++) {
                bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
                bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+               if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
+                       BUG();
        }
+       VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
 
        /*
         * Fill-in bvec with data from orig_bios.
         */
-       frames_write = 0;
        spin_lock(&pkt->lock);
-       bio_list_for_each(bio, &pkt->orig_bios) {
-               int segment = bio->bi_idx;
-               int src_offs = 0;
-               int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
-               int num_frames = bio->bi_size / CD_FRAMESIZE;
-               BUG_ON(first_frame < 0);
-               BUG_ON(first_frame + num_frames > pkt->frames);
-               for (f = first_frame; f < first_frame + num_frames; f++) {
-                       struct bio_vec *src_bvl = bio_iovec_idx(bio, segment);
-
-                       while (src_offs >= src_bvl->bv_len) {
-                               src_offs -= src_bvl->bv_len;
-                               segment++;
-                               BUG_ON(segment >= bio->bi_vcnt);
-                               src_bvl = bio_iovec_idx(bio, segment);
-                       }
+       bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
 
-                       if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) {
-                               bvec[f].bv_page = src_bvl->bv_page;
-                               bvec[f].bv_offset = src_bvl->bv_offset + src_offs;
-                       } else {
-                               pkt_copy_bio_data(bio, segment, src_offs,
-                                                 bvec[f].bv_page, bvec[f].bv_offset);
-                       }
-                       src_offs += CD_FRAMESIZE;
-                       frames_write++;
-               }
-       }
        pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
        spin_unlock(&pkt->lock);
 
        VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
-               frames_write, (unsigned long long)pkt->sector);
-       BUG_ON(frames_write != pkt->write_size);
+               pkt->write_size, (unsigned long long)pkt->sector);
 
        if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
                pkt_make_local_copy(pkt, bvec);
@@ -1383,16 +1337,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
        }
 
        /* Start the write request */
-       bio_reset(pkt->w_bio);
-       pkt->w_bio->bi_sector = pkt->sector;
-       pkt->w_bio->bi_bdev = pd->bdev;
-       pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
-       pkt->w_bio->bi_private = pkt;
-       for (f = 0; f < pkt->frames; f++)
-               if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
-                       BUG();
-       VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
-
        atomic_set(&pkt->io_wait, 1);
        pkt->w_bio->bi_rw = WRITE;
        pkt_queue_bio(pd, pkt->w_bio);
@@ -2433,7 +2377,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
                cloned_bio->bi_bdev = pd->bdev;
                cloned_bio->bi_private = psd;
                cloned_bio->bi_end_io = pkt_end_io_read_cloned;
-               pd->stats.secs_r += bio->bi_size >> 9;
+               pd->stats.secs_r += bio_sectors(bio);
                pkt_queue_bio(pd, cloned_bio);
                return;
        }
@@ -2454,7 +2398,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
        zone = ZONE(bio->bi_sector, pd);
        VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
                (unsigned long long)bio->bi_sector,
-               (unsigned long long)(bio->bi_sector + bio_sectors(bio)));
+               (unsigned long long)bio_end_sector(bio));
 
        /* Check if we have to split the bio */
        {
@@ -2462,7 +2406,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
                sector_t last_zone;
                int first_sectors;
 
-               last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd);
+               last_zone = ZONE(bio_end_sector(bio) - 1, pd);
                if (last_zone != zone) {
                        BUG_ON(last_zone != zone + pd->settings.size);
                        first_sectors = last_zone - bio->bi_sector;
index f556f8a8b3f9b476949c6133f39778c49502e380..6b2b039c191fc5da53418371f290babdc5cd02c1 100644 (file)
@@ -952,7 +952,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
        /* Find first affected segment... */
 
        resid = offset;
-       __bio_for_each_segment(bv, bio_src, idx, 0) {
+       bio_for_each_segment(bv, bio_src, idx) {
                if (resid < bv->bv_len)
                        break;
                resid -= bv->bv_len;
index 13c15480d9401a79ea41fb44c603a007d97e2ea1..6d2d41ae9e322dbd53e787e5294f2d55551296eb 100644 (file)
@@ -858,8 +858,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
        unsigned int i;
        struct bio_vec *bv;
 
-       for (i = 0; i < clone->bi_vcnt; i++) {
-               bv = bio_iovec_idx(clone, i);
+       bio_for_each_segment_all(bv, clone, i) {
                BUG_ON(!bv->bv_page);
                mempool_free(bv->bv_page, cc->page_pool);
                bv->bv_page = NULL;
index d053098c6a917f1ac55fec457ab7c7f590ecb1eb..699b5be68d319263cce75e8d932deb0be25c7d00 100644 (file)
@@ -458,7 +458,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m,
 {
        io->bdev = m->dev->bdev;
        io->sector = map_sector(m, bio);
-       io->count = bio->bi_size >> 9;
+       io->count = bio_sectors(bio);
 }
 
 static void hold_bio(struct mirror_set *ms, struct bio *bio)
index d8837d313f5434183439f057f463c99be100e0ea..ea5e878a30b93b1f974d449738fc46b55d5eaeba 100644 (file)
@@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
        sector_t begin, end;
 
        stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
-       stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
+       stripe_map_range_sector(sc, bio_end_sector(bio),
                                target_stripe, &end);
        if (begin < end) {
                bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
index a746f1d21c661bec7b809c9a688fddf44284581f..b948fd864d457e9ce857d1d04fd74bf8997e0aee 100644 (file)
@@ -501,7 +501,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
                return -EIO;
        }
 
-       if ((bio->bi_sector + bio_sectors(bio)) >>
+       if (bio_end_sector(bio) >>
            (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
                DMERR_LIMIT("io out of range");
                return -EIO;
@@ -519,7 +519,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 
        bio->bi_end_io = verity_end_io;
        bio->bi_private = io;
-       io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
+       io->io_vec_size = bio_segments(bio);
        if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
                io->io_vec = io->io_vec_inline;
        else
index 5e7dc772f5deca223ea9142d073cc66785f54b27..3193aefe982b7b42badf4eba4adc36f89439d70c 100644 (file)
@@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
                        return;
                }
 
-               if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9),
-                                WRITE))
+               if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE))
                        failit = 1;
                if (check_mode(conf, WritePersistent)) {
                        add_sector(conf, bio->bi_sector, WritePersistent);
@@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
                        failit = 1;
        } else {
                /* read request */
-               if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9),
-                                READ))
+               if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ))
                        failit = 1;
                if (check_mode(conf, ReadTransient))
                        failit = 1;
index 21014836bdbf2286eba4fd02effbc4efcd9db625..f03fabd2b37bacf34a231a0bb034a6d8f2826e68 100644 (file)
@@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
                bio_io_error(bio);
                return;
        }
-       if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
-                    tmp_dev->end_sector)) {
+       if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
                /* This bio crosses a device boundary, so we have to
                 * split it.
                 */
index aeceedfc530b90b5da5b45e12ffdc9e4d665e16d..1d03ebde40b51885cd63950d695b19fe5b9214de 100644 (file)
@@ -194,21 +194,12 @@ void md_trim_bio(struct bio *bio, int offset, int size)
        if (offset == 0 && size == bio->bi_size)
                return;
 
-       bio->bi_sector += offset;
-       bio->bi_size = size;
-       offset <<= 9;
        clear_bit(BIO_SEG_VALID, &bio->bi_flags);
 
-       while (bio->bi_idx < bio->bi_vcnt &&
-              bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
-               /* remove this whole bio_vec */
-               offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
-               bio->bi_idx++;
-       }
-       if (bio->bi_idx < bio->bi_vcnt) {
-               bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
-               bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
-       }
+       bio_advance(bio, offset << 9);
+
+       bio->bi_size = size;
+
        /* avoid any complications with bi_idx being non-zero*/
        if (bio->bi_idx) {
                memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
index 0505452de8d6ee2b3533c7930d62df544636b0c3..fcf65e512cf51a02413ae4439e72ce7ab6675159 100644 (file)
@@ -502,11 +502,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
 {
        if (likely(is_power_of_2(chunk_sects))) {
                return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
-                                       + (bio->bi_size >> 9));
+                                       + bio_sectors(bio));
        } else{
                sector_t sector = bio->bi_sector;
                return chunk_sects >= (sector_div(sector, chunk_sects)
-                                               + (bio->bi_size >> 9));
+                                               + bio_sectors(bio));
        }
 }
 
@@ -527,8 +527,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                sector_t sector = bio->bi_sector;
                struct bio_pair *bp;
                /* Sanity check -- queue functions should prevent this happening */
-               if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
-                   bio->bi_idx != 0)
+               if (bio_segments(bio) > 1)
                        goto bad_map;
                /* This is a one page bio that upper layers
                 * refuse to split for us, so we need to split it.
@@ -567,7 +566,7 @@ bad_map:
        printk("md/raid0:%s: make_request bug: can't convert block across chunks"
               " or bigger than %dk %llu %d\n",
               mdname(mddev), chunk_sects / 2,
-              (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
+              (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
 
        bio_io_error(bio);
        return;
index fd86b372692db6d6bd5c6afdd498ae16be49f2c3..aeb4e3f74791b3f1d163aa19c8f3ed34cfa0f284 100644 (file)
@@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
 static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
        struct pool_info *pi = data;
-       struct page *page;
        struct r1bio *r1_bio;
        struct bio *bio;
        int i, j;
@@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
                j = 1;
        while(j--) {
                bio = r1_bio->bios[j];
-               for (i = 0; i < RESYNC_PAGES; i++) {
-                       page = alloc_page(gfp_flags);
-                       if (unlikely(!page))
-                               goto out_free_pages;
+               bio->bi_vcnt = RESYNC_PAGES;
 
-                       bio->bi_io_vec[i].bv_page = page;
-                       bio->bi_vcnt = i+1;
-               }
+               if (bio_alloc_pages(bio, gfp_flags))
+                       goto out_free_bio;
        }
        /* If not user-requests, copy the page pointers to all bios */
        if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
@@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 
        return r1_bio;
 
-out_free_pages:
-       for (j=0 ; j < pi->raid_disks; j++)
-               for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
-                       put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
-       j = -1;
 out_free_bio:
        while (++j < pi->raid_disks)
                bio_put(r1_bio->bios[j]);
@@ -267,7 +257,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
                         (bio_data_dir(bio) == WRITE) ? "write" : "read",
                         (unsigned long long) bio->bi_sector,
                         (unsigned long long) bio->bi_sector +
-                        (bio->bi_size >> 9) - 1);
+                        bio_sectors(bio) - 1);
 
                call_bio_endio(r1_bio);
        }
@@ -458,7 +448,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
                                         " %llu-%llu\n",
                                         (unsigned long long) mbio->bi_sector,
                                         (unsigned long long) mbio->bi_sector +
-                                        (mbio->bi_size >> 9) - 1);
+                                        bio_sectors(mbio) - 1);
                                call_bio_endio(r1_bio);
                        }
                }
@@ -925,7 +915,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
        if (unlikely(!bvecs))
                return;
 
-       bio_for_each_segment(bvec, bio, i) {
+       bio_for_each_segment_all(bvec, bio, i) {
                bvecs[i] = *bvec;
                bvecs[i].bv_page = alloc_page(GFP_NOIO);
                if (unlikely(!bvecs[i].bv_page))
@@ -1018,7 +1008,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        md_write_start(mddev, bio); /* wait on superblock update early */
 
        if (bio_data_dir(bio) == WRITE &&
-           bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
+           bio_end_sector(bio) > mddev->suspend_lo &&
            bio->bi_sector < mddev->suspend_hi) {
                /* As the suspend_* range is controlled by
                 * userspace, we want an interruptible
@@ -1029,7 +1019,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                        flush_signals(current);
                        prepare_to_wait(&conf->wait_barrier,
                                        &w, TASK_INTERRUPTIBLE);
-                       if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
+                       if (bio_end_sector(bio) <= mddev->suspend_lo ||
                            bio->bi_sector >= mddev->suspend_hi)
                                break;
                        schedule();
@@ -1049,7 +1039,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
 
        r1_bio->master_bio = bio;
-       r1_bio->sectors = bio->bi_size >> 9;
+       r1_bio->sectors = bio_sectors(bio);
        r1_bio->state = 0;
        r1_bio->mddev = mddev;
        r1_bio->sector = bio->bi_sector;
@@ -1127,7 +1117,7 @@ read_again:
                        r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
 
                        r1_bio->master_bio = bio;
-                       r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+                       r1_bio->sectors = bio_sectors(bio) - sectors_handled;
                        r1_bio->state = 0;
                        r1_bio->mddev = mddev;
                        r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1284,14 +1274,10 @@ read_again:
                        struct bio_vec *bvec;
                        int j;
 
-                       /* Yes, I really want the '__' version so that
-                        * we clear any unused pointer in the io_vec, rather
-                        * than leave them unchanged.  This is important
-                        * because when we come to free the pages, we won't
-                        * know the original bi_idx, so we just free
-                        * them all
+                       /*
+                        * We trimmed the bio, so _all is legit
                         */
-                       __bio_for_each_segment(bvec, mbio, j, 0)
+                       bio_for_each_segment_all(bvec, mbio, j)
                                bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
                        if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
                                atomic_inc(&r1_bio->behind_remaining);
@@ -1329,14 +1315,14 @@ read_again:
        /* Mustn't call r1_bio_write_done before this next test,
         * as it could result in the bio being freed.
         */
-       if (sectors_handled < (bio->bi_size >> 9)) {
+       if (sectors_handled < bio_sectors(bio)) {
                r1_bio_write_done(r1_bio);
                /* We need another r1_bio.  It has already been counted
                 * in bio->bi_phys_segments
                 */
                r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
                r1_bio->master_bio = bio;
-               r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+               r1_bio->sectors = bio_sectors(bio) - sectors_handled;
                r1_bio->state = 0;
                r1_bio->mddev = mddev;
                r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1862,7 +1848,7 @@ static int process_checks(struct r1bio *r1_bio)
                struct bio *sbio = r1_bio->bios[i];
                int size;
 
-               if (r1_bio->bios[i]->bi_end_io != end_sync_read)
+               if (sbio->bi_end_io != end_sync_read)
                        continue;
 
                if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
@@ -1887,16 +1873,15 @@ static int process_checks(struct r1bio *r1_bio)
                        continue;
                }
                /* fixup the bio for reuse */
+               bio_reset(sbio);
                sbio->bi_vcnt = vcnt;
                sbio->bi_size = r1_bio->sectors << 9;
-               sbio->bi_idx = 0;
-               sbio->bi_phys_segments = 0;
-               sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
-               sbio->bi_flags |= 1 << BIO_UPTODATE;
-               sbio->bi_next = NULL;
                sbio->bi_sector = r1_bio->sector +
                        conf->mirrors[i].rdev->data_offset;
                sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+               sbio->bi_end_io = end_sync_read;
+               sbio->bi_private = r1_bio;
+
                size = sbio->bi_size;
                for (j = 0; j < vcnt ; j++) {
                        struct bio_vec *bi;
@@ -1907,10 +1892,9 @@ static int process_checks(struct r1bio *r1_bio)
                        else
                                bi->bv_len = size;
                        size -= PAGE_SIZE;
-                       memcpy(page_address(bi->bv_page),
-                              page_address(pbio->bi_io_vec[j].bv_page),
-                              PAGE_SIZE);
                }
+
+               bio_copy_data(sbio, pbio);
        }
        return 0;
 }
@@ -1947,7 +1931,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
                wbio->bi_rw = WRITE;
                wbio->bi_end_io = end_sync_write;
                atomic_inc(&r1_bio->remaining);
-               md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
+               md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
 
                generic_make_request(wbio);
        }
@@ -2059,32 +2043,11 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
        }
 }
 
-static void bi_complete(struct bio *bio, int error)
-{
-       complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
-       struct completion event;
-       rw |= REQ_SYNC;
-
-       init_completion(&event);
-       bio->bi_private = &event;
-       bio->bi_end_io = bi_complete;
-       submit_bio(rw, bio);
-       wait_for_completion(&event);
-
-       return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
 static int narrow_write_error(struct r1bio *r1_bio, int i)
 {
        struct mddev *mddev = r1_bio->mddev;
        struct r1conf *conf = mddev->private;
        struct md_rdev *rdev = conf->mirrors[i].rdev;
-       int vcnt, idx;
-       struct bio_vec *vec;
 
        /* bio has the data to be written to device 'i' where
         * we just recently had a write error.
@@ -2112,30 +2075,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
                   & ~(sector_t)(block_sectors - 1))
                - sector;
 
-       if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-               vcnt = r1_bio->behind_page_count;
-               vec = r1_bio->behind_bvecs;
-               idx = 0;
-               while (vec[idx].bv_page == NULL)
-                       idx++;
-       } else {
-               vcnt = r1_bio->master_bio->bi_vcnt;
-               vec = r1_bio->master_bio->bi_io_vec;
-               idx = r1_bio->master_bio->bi_idx;
-       }
        while (sect_to_write) {
                struct bio *wbio;
                if (sectors > sect_to_write)
                        sectors = sect_to_write;
                /* Write at 'sector' for 'sectors'*/
 
-               wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
-               memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
-               wbio->bi_sector = r1_bio->sector;
+               if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+                       unsigned vcnt = r1_bio->behind_page_count;
+                       struct bio_vec *vec = r1_bio->behind_bvecs;
+
+                       while (!vec->bv_page) {
+                               vec++;
+                               vcnt--;
+                       }
+
+                       wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+                       memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+
+                       wbio->bi_vcnt = vcnt;
+               } else {
+                       wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+               }
+
                wbio->bi_rw = WRITE;
-               wbio->bi_vcnt = vcnt;
+               wbio->bi_sector = r1_bio->sector;
                wbio->bi_size = r1_bio->sectors << 9;
-               wbio->bi_idx = idx;
 
                md_trim_bio(wbio, sector - r1_bio->sector, sectors);
                wbio->bi_sector += rdev->data_offset;
@@ -2284,8 +2249,7 @@ read_more:
                        r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
 
                        r1_bio->master_bio = mbio;
-                       r1_bio->sectors = (mbio->bi_size >> 9)
-                                         - sectors_handled;
+                       r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
                        r1_bio->state = 0;
                        set_bit(R1BIO_ReadError, &r1_bio->state);
                        r1_bio->mddev = mddev;
@@ -2459,18 +2423,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
        for (i = 0; i < conf->raid_disks * 2; i++) {
                struct md_rdev *rdev;
                bio = r1_bio->bios[i];
-
-               /* take from bio_init */
-               bio->bi_next = NULL;
-               bio->bi_flags &= ~(BIO_POOL_MASK-1);
-               bio->bi_flags |= 1 << BIO_UPTODATE;
-               bio->bi_rw = READ;
-               bio->bi_vcnt = 0;
-               bio->bi_idx = 0;
-               bio->bi_phys_segments = 0;
-               bio->bi_size = 0;
-               bio->bi_end_io = NULL;
-               bio->bi_private = NULL;
+               bio_reset(bio);
 
                rdev = rcu_dereference(conf->mirrors[i].rdev);
                if (rdev == NULL ||
index 77b562d18a90c4d27d9a5c43e82518e174ed84c7..e32e8b1042f8e57e5ee77f7162af708717f9e06c 100644 (file)
@@ -1169,14 +1169,13 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        /* If this request crosses a chunk boundary, we need to
         * split it.  This will only happen for 1 PAGE (or less) requests.
         */
-       if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9)
+       if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
                     > chunk_sects
                     && (conf->geo.near_copies < conf->geo.raid_disks
                         || conf->prev.near_copies < conf->prev.raid_disks))) {
                struct bio_pair *bp;
                /* Sanity check -- queue functions should prevent this happening */
-               if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
-                   bio->bi_idx != 0)
+               if (bio_segments(bio) > 1)
                        goto bad_map;
                /* This is a one page bio that upper layers
                 * refuse to split for us, so we need to split it.
@@ -1209,7 +1208,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        bad_map:
                printk("md/raid10:%s: make_request bug: can't convert block across chunks"
                       " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
-                      (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
+                      (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
 
                bio_io_error(bio);
                return;
@@ -1224,7 +1223,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
         */
        wait_barrier(conf);
 
-       sectors = bio->bi_size >> 9;
+       sectors = bio_sectors(bio);
        while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            bio->bi_sector < conf->reshape_progress &&
            bio->bi_sector + sectors > conf->reshape_progress) {
@@ -1326,8 +1325,7 @@ read_again:
                        r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
                        r10_bio->master_bio = bio;
-                       r10_bio->sectors = ((bio->bi_size >> 9)
-                                           - sectors_handled);
+                       r10_bio->sectors = bio_sectors(bio) - sectors_handled;
                        r10_bio->state = 0;
                        r10_bio->mddev = mddev;
                        r10_bio->sector = bio->bi_sector + sectors_handled;
@@ -1569,7 +1567,7 @@ retry_write:
         * after checking if we need to go around again.
         */
 
-       if (sectors_handled < (bio->bi_size >> 9)) {
+       if (sectors_handled < bio_sectors(bio)) {
                one_write_done(r10_bio);
                /* We need another r10_bio.  It has already been counted
                 * in bio->bi_phys_segments.
@@ -1577,7 +1575,7 @@ retry_write:
                r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
                r10_bio->master_bio = bio;
-               r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+               r10_bio->sectors = bio_sectors(bio) - sectors_handled;
 
                r10_bio->mddev = mddev;
                r10_bio->sector = bio->bi_sector + sectors_handled;
@@ -2079,13 +2077,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                 * First we need to fixup bv_offset, bv_len and
                 * bi_vecs, as the read request might have corrupted these
                 */
+               bio_reset(tbio);
+
                tbio->bi_vcnt = vcnt;
                tbio->bi_size = r10_bio->sectors << 9;
-               tbio->bi_idx = 0;
-               tbio->bi_phys_segments = 0;
-               tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
-               tbio->bi_flags |= 1 << BIO_UPTODATE;
-               tbio->bi_next = NULL;
                tbio->bi_rw = WRITE;
                tbio->bi_private = r10_bio;
                tbio->bi_sector = r10_bio->devs[i].addr;
@@ -2103,7 +2098,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                d = r10_bio->devs[i].devnum;
                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                atomic_inc(&r10_bio->remaining);
-               md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9);
+               md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
 
                tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
                tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
@@ -2128,7 +2123,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                d = r10_bio->devs[i].devnum;
                atomic_inc(&r10_bio->remaining);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
-                            tbio->bi_size >> 9);
+                            bio_sectors(tbio));
                generic_make_request(tbio);
        }
 
@@ -2254,13 +2249,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
        wbio2 = r10_bio->devs[1].repl_bio;
        if (wbio->bi_end_io) {
                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
-               md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
+               md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
                generic_make_request(wbio);
        }
        if (wbio2 && wbio2->bi_end_io) {
                atomic_inc(&conf->mirrors[d].replacement->nr_pending);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
-                            wbio2->bi_size >> 9);
+                            bio_sectors(wbio2));
                generic_make_request(wbio2);
        }
 }
@@ -2531,25 +2526,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
        }
 }
 
-static void bi_complete(struct bio *bio, int error)
-{
-       complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
-       struct completion event;
-       rw |= REQ_SYNC;
-
-       init_completion(&event);
-       bio->bi_private = &event;
-       bio->bi_end_io = bi_complete;
-       submit_bio(rw, bio);
-       wait_for_completion(&event);
-
-       return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
 static int narrow_write_error(struct r10bio *r10_bio, int i)
 {
        struct bio *bio = r10_bio->master_bio;
@@ -2690,8 +2666,7 @@ read_more:
                r10_bio = mempool_alloc(conf->r10bio_pool,
                                        GFP_NOIO);
                r10_bio->master_bio = mbio;
-               r10_bio->sectors = (mbio->bi_size >> 9)
-                       - sectors_handled;
+               r10_bio->sectors = bio_sectors(mbio) - sectors_handled;
                r10_bio->state = 0;
                set_bit(R10BIO_ReadError,
                        &r10_bio->state);
@@ -3112,6 +3087,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                        }
                                }
                                bio = r10_bio->devs[0].bio;
+                               bio_reset(bio);
                                bio->bi_next = biolist;
                                biolist = bio;
                                bio->bi_private = r10_bio;
@@ -3136,6 +3112,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                rdev = mirror->rdev;
                                if (!test_bit(In_sync, &rdev->flags)) {
                                        bio = r10_bio->devs[1].bio;
+                                       bio_reset(bio);
                                        bio->bi_next = biolist;
                                        biolist = bio;
                                        bio->bi_private = r10_bio;
@@ -3164,6 +3141,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                if (rdev == NULL || bio == NULL ||
                                    test_bit(Faulty, &rdev->flags))
                                        break;
+                               bio_reset(bio);
                                bio->bi_next = biolist;
                                biolist = bio;
                                bio->bi_private = r10_bio;
@@ -3262,7 +3240,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                r10_bio->devs[i].repl_bio->bi_end_io = NULL;
 
                        bio = r10_bio->devs[i].bio;
-                       bio->bi_end_io = NULL;
+                       bio_reset(bio);
                        clear_bit(BIO_UPTODATE, &bio->bi_flags);
                        if (conf->mirrors[d].rdev == NULL ||
                            test_bit(Faulty, &conf->mirrors[d].rdev->flags))
@@ -3299,6 +3277,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
 
                        /* Need to set up for writing to the replacement */
                        bio = r10_bio->devs[i].repl_bio;
+                       bio_reset(bio);
                        clear_bit(BIO_UPTODATE, &bio->bi_flags);
 
                        sector = r10_bio->devs[i].addr;
@@ -3332,17 +3311,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                }
        }
 
-       for (bio = biolist; bio ; bio=bio->bi_next) {
-
-               bio->bi_flags &= ~(BIO_POOL_MASK - 1);
-               if (bio->bi_end_io)
-                       bio->bi_flags |= 1 << BIO_UPTODATE;
-               bio->bi_vcnt = 0;
-               bio->bi_idx = 0;
-               bio->bi_phys_segments = 0;
-               bio->bi_size = 0;
-       }
-
        nr_sectors = 0;
        if (sector_nr + max_sync < max_sector)
                max_sector = sector_nr + max_sync;
@@ -4389,7 +4357,6 @@ read_more:
        read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
        read_bio->bi_flags |= 1 << BIO_UPTODATE;
        read_bio->bi_vcnt = 0;
-       read_bio->bi_idx = 0;
        read_bio->bi_size = 0;
        r10_bio->master_bio = read_bio;
        r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
@@ -4413,17 +4380,14 @@ read_more:
                }
                if (!rdev2 || test_bit(Faulty, &rdev2->flags))
                        continue;
+
+               bio_reset(b);
                b->bi_bdev = rdev2->bdev;
                b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
                b->bi_private = r10_bio;
                b->bi_end_io = end_reshape_write;
                b->bi_rw = WRITE;
-               b->bi_flags &= ~(BIO_POOL_MASK - 1);
-               b->bi_flags |= 1 << BIO_UPTODATE;
                b->bi_next = blist;
-               b->bi_vcnt = 0;
-               b->bi_idx = 0;
-               b->bi_size = 0;
                blist = b;
        }
 
index 24909eb13fec1b0bf22e40caa4ff7a76967f80e4..2fefb9f2198e2269bafa26d62e60eb96dbb63aa0 100644 (file)
@@ -90,7 +90,7 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
  */
 static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
 {
-       int sectors = bio->bi_size >> 9;
+       int sectors = bio_sectors(bio);
        if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
                return bio->bi_next;
        else
@@ -567,14 +567,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                bi = &sh->dev[i].req;
                rbi = &sh->dev[i].rreq; /* For writing to replacement */
 
-               bi->bi_rw = rw;
-               rbi->bi_rw = rw;
-               if (rw & WRITE) {
-                       bi->bi_end_io = raid5_end_write_request;
-                       rbi->bi_end_io = raid5_end_write_request;
-               } else
-                       bi->bi_end_io = raid5_end_read_request;
-
                rcu_read_lock();
                rrdev = rcu_dereference(conf->disks[i].replacement);
                smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
@@ -649,7 +641,14 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
+                       bio_reset(bi);
                        bi->bi_bdev = rdev->bdev;
+                       bi->bi_rw = rw;
+                       bi->bi_end_io = (rw & WRITE)
+                               ? raid5_end_write_request
+                               : raid5_end_read_request;
+                       bi->bi_private = sh;
+
                        pr_debug("%s: for %llu schedule op %ld on disc %d\n",
                                __func__, (unsigned long long)sh->sector,
                                bi->bi_rw, i);
@@ -663,12 +662,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
                                bi->bi_rw |= REQ_FLUSH;
 
-                       bi->bi_flags = 1 << BIO_UPTODATE;
-                       bi->bi_idx = 0;
                        bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        bi->bi_io_vec[0].bv_offset = 0;
                        bi->bi_size = STRIPE_SIZE;
-                       bi->bi_next = NULL;
                        if (rrdev)
                                set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
 
@@ -685,7 +681,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
+                       bio_reset(rbi);
                        rbi->bi_bdev = rrdev->bdev;
+                       rbi->bi_rw = rw;
+                       BUG_ON(!(rw & WRITE));
+                       rbi->bi_end_io = raid5_end_write_request;
+                       rbi->bi_private = sh;
+
                        pr_debug("%s: for %llu schedule op %ld on "
                                 "replacement disc %d\n",
                                __func__, (unsigned long long)sh->sector,
@@ -697,12 +699,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        else
                                rbi->bi_sector = (sh->sector
                                                  + rrdev->data_offset);
-                       rbi->bi_flags = 1 << BIO_UPTODATE;
-                       rbi->bi_idx = 0;
                        rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        rbi->bi_io_vec[0].bv_offset = 0;
                        rbi->bi_size = STRIPE_SIZE;
-                       rbi->bi_next = NULL;
                        if (conf->mddev->gendisk)
                                trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
                                                      rbi, disk_devt(conf->mddev->gendisk),
@@ -2393,11 +2392,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
        } else
                bip = &sh->dev[dd_idx].toread;
        while (*bip && (*bip)->bi_sector < bi->bi_sector) {
-               if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
+               if (bio_end_sector(*bip) > bi->bi_sector)
                        goto overlap;
                bip = & (*bip)->bi_next;
        }
-       if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
+       if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
                goto overlap;
 
        BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2413,8 +2412,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
                             bi && bi->bi_sector <= sector;
                     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
-                       if (bi->bi_sector + (bi->bi_size>>9) >= sector)
-                               sector = bi->bi_sector + (bi->bi_size>>9);
+                       if (bio_end_sector(bi) >= sector)
+                               sector = bio_end_sector(bi);
                }
                if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
                        set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
@@ -3840,7 +3839,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
 {
        sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
        unsigned int chunk_sectors = mddev->chunk_sectors;
-       unsigned int bio_sectors = bio->bi_size >> 9;
+       unsigned int bio_sectors = bio_sectors(bio);
 
        if (mddev->new_chunk_sectors < mddev->chunk_sectors)
                chunk_sectors = mddev->new_chunk_sectors;
@@ -3930,7 +3929,7 @@ static int bio_fits_rdev(struct bio *bi)
 {
        struct request_queue *q = bdev_get_queue(bi->bi_bdev);
 
-       if ((bi->bi_size>>9) > queue_max_sectors(q))
+       if (bio_sectors(bi) > queue_max_sectors(q))
                return 0;
        blk_recount_segments(q, bi);
        if (bi->bi_phys_segments > queue_max_segments(q))
@@ -3977,7 +3976,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                                                    0,
                                                    &dd_idx, NULL);
 
-       end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
+       end_sector = bio_end_sector(align_bi);
        rcu_read_lock();
        rdev = rcu_dereference(conf->disks[dd_idx].replacement);
        if (!rdev || test_bit(Faulty, &rdev->flags) ||
@@ -4000,7 +3999,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
 
                if (!bio_fits_rdev(align_bi) ||
-                   is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+                   is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
                                &first_bad, &bad_sectors)) {
                        /* too big in some way, or has a known bad block */
                        bio_put(align_bi);
@@ -4262,7 +4261,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
        }
 
        logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-       last_sector = bi->bi_sector + (bi->bi_size>>9);
+       last_sector = bio_end_sector(bi);
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
 
@@ -4725,7 +4724,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
        logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
        sector = raid5_compute_sector(conf, logical_sector,
                                      0, &dd_idx, NULL);
-       last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
+       last_sector = bio_end_sector(raid_bio);
 
        for (; logical_sector < last_sector;
             logical_sector += STRIPE_SECTORS,
index fa43c391c8ed1072b35200ba671623c933c1df80..2bb01546df0bcdf5063b09ccf55da5d17dbf838d 100644 (file)
@@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
        }
 
        /* do we need to support multiple segments? */
-       if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
+       if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
                printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
-                   ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
-                   rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
+                   ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req),
+                   bio_segments(rsp->bio), blk_rq_bytes(rsp));
                return -EINVAL;
        }
 
index b6ad0de07930c531f9a81f8314a1b6b974e9d6db..12d08b4529e9d3b1292ed317ee641bdfd7abddd5 100644 (file)
@@ -826,8 +826,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
        if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
                /* Request is not page-aligned. */
                goto fail;
-       if (((bio->bi_size >> 9) + bio->bi_sector)
-                       > get_capacity(bio->bi_bdev->bd_disk)) {
+       if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
                /* Request beyond end of DCSS segment. */
                goto fail;
        }
index aec2e0da50164ca688cc9e2ea95355f0aeaf4669..7af776737b40ea346fb7586bdfea1236fb8b92a0 100644 (file)
@@ -2151,10 +2151,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
        }
 
        /* do we need to support multiple segments? */
-       if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
+       if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
                printk("%s: multiple segments req %u %u, rsp %u %u\n",
-                      __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
-                      rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
+                      __func__, bio_segments(req->bio), blk_rq_bytes(req),
+                      bio_segments(rsp->bio), blk_rq_bytes(rsp));
                return -EINVAL;
        }
 
index 8c2ffbe6af0f1dbd67e724507101eabe5e0ceca6..193e7ae90c3bd754b7a50a01db5fda73b2822502 100644 (file)
@@ -1939,7 +1939,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
        ioc->transport_cmds.status = MPT2_CMD_PENDING;
 
        /* Check if the request is split across multiple segments */
-       if (req->bio->bi_vcnt > 1) {
+       if (bio_segments(req->bio) > 1) {
                u32 offset = 0;
 
                /* Allocate memory and copy the request */
@@ -1971,7 +1971,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
        /* Check if the response needs to be populated across
         * multiple segments */
-       if (rsp->bio->bi_vcnt > 1) {
+       if (bio_segments(rsp->bio) > 1) {
                pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
                    &pci_dma_in);
                if (!pci_addr_in) {
@@ -2038,7 +2038,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
        sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
            MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
        sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
-       if (req->bio->bi_vcnt > 1) {
+       if (bio_segments(req->bio) > 1) {
                ioc->base_add_sg_single(psge, sgl_flags |
                    (blk_rq_bytes(req) - 4), pci_dma_out);
        } else {
@@ -2054,7 +2054,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
            MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
            MPI2_SGE_FLAGS_END_OF_LIST);
        sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
-       if (rsp->bio->bi_vcnt > 1) {
+       if (bio_segments(rsp->bio) > 1) {
                ioc->base_add_sg_single(psge, sgl_flags |
                    (blk_rq_bytes(rsp) + 4), pci_dma_in);
        } else {
@@ -2099,7 +2099,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
                    le16_to_cpu(mpi_reply->ResponseDataLength);
                /* check if the resp needs to be copied from the allocated
                 * pci mem */
-               if (rsp->bio->bi_vcnt > 1) {
+               if (bio_segments(rsp->bio) > 1) {
                        u32 offset = 0;
                        u32 bytes_to_copy =
                            le16_to_cpu(mpi_reply->ResponseDataLength);
index a3f28f331b2bba7e6653da30dce92adba5d97140..8fb42916d8a29812e349fdbfa980ef1c34056ce4 100644 (file)
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 
-struct integrity_slab {
-       struct kmem_cache *slab;
-       unsigned short nr_vecs;
-       char name[8];
-};
-
-#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
-struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
-       IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
-};
-#undef IS
+#define BIP_INLINE_VECS        4
 
+static struct kmem_cache *bip_slab;
 static struct workqueue_struct *kintegrityd_wq;
 
-static inline unsigned int vecs_to_idx(unsigned int nr)
-{
-       switch (nr) {
-       case 1:
-               return 0;
-       case 2 ... 4:
-               return 1;
-       case 5 ... 16:
-               return 2;
-       case 17 ... 64:
-               return 3;
-       case 65 ... 128:
-               return 4;
-       case 129 ... BIO_MAX_PAGES:
-               return 5;
-       default:
-               BUG();
-       }
-}
-
-static inline int use_bip_pool(unsigned int idx)
-{
-       if (idx == BIOVEC_MAX_IDX)
-               return 1;
-
-       return 0;
-}
-
 /**
  * bio_integrity_alloc - Allocate integrity payload and attach it to bio
  * @bio:       bio to attach integrity metadata to
@@ -84,37 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
                                                  unsigned int nr_vecs)
 {
        struct bio_integrity_payload *bip;
-       unsigned int idx = vecs_to_idx(nr_vecs);
        struct bio_set *bs = bio->bi_pool;
-
-       if (!bs)
-               bs = fs_bio_set;
-
-       BUG_ON(bio == NULL);
-       bip = NULL;
-
-       /* Lower order allocations come straight from slab */
-       if (!use_bip_pool(idx))
-               bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
-
-       /* Use mempool if lower order alloc failed or max vecs were requested */
-       if (bip == NULL) {
-               idx = BIOVEC_MAX_IDX;  /* so we free the payload properly later */
+       unsigned long idx = BIO_POOL_NONE;
+       unsigned inline_vecs;
+
+       if (!bs) {
+               bip = kmalloc(sizeof(struct bio_integrity_payload) +
+                             sizeof(struct bio_vec) * nr_vecs, gfp_mask);
+               inline_vecs = nr_vecs;
+       } else {
                bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
-
-               if (unlikely(bip == NULL)) {
-                       printk(KERN_ERR "%s: could not alloc bip\n", __func__);
-                       return NULL;
-               }
+               inline_vecs = BIP_INLINE_VECS;
        }
 
+       if (unlikely(!bip))
+               return NULL;
+
        memset(bip, 0, sizeof(*bip));
 
+       if (nr_vecs > inline_vecs) {
+               bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
+                                         bs->bvec_integrity_pool);
+               if (!bip->bip_vec)
+                       goto err;
+       } else {
+               bip->bip_vec = bip->bip_inline_vecs;
+       }
+
        bip->bip_slab = idx;
        bip->bip_bio = bio;
        bio->bi_integrity = bip;
 
        return bip;
+err:
+       mempool_free(bip, bs->bio_integrity_pool);
+       return NULL;
 }
 EXPORT_SYMBOL(bio_integrity_alloc);
 
@@ -130,20 +97,18 @@ void bio_integrity_free(struct bio *bio)
        struct bio_integrity_payload *bip = bio->bi_integrity;
        struct bio_set *bs = bio->bi_pool;
 
-       if (!bs)
-               bs = fs_bio_set;
-
-       BUG_ON(bip == NULL);
-
-       /* A cloned bio doesn't own the integrity metadata */
-       if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
-           && bip->bip_buf != NULL)
+       if (bip->bip_owns_buf)
                kfree(bip->bip_buf);
 
-       if (use_bip_pool(bip->bip_slab))
+       if (bs) {
+               if (bip->bip_slab != BIO_POOL_NONE)
+                       bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
+                                 bip->bip_slab);
+
                mempool_free(bip, bs->bio_integrity_pool);
-       else
-               kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
+       } else {
+               kfree(bip);
+       }
 
        bio->bi_integrity = NULL;
 }
@@ -419,6 +384,7 @@ int bio_integrity_prep(struct bio *bio)
                return -EIO;
        }
 
+       bip->bip_owns_buf = 1;
        bip->bip_buf = buf;
        bip->bip_size = len;
        bip->bip_sector = bio->bi_sector;
@@ -694,11 +660,11 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
        bp->bio1.bi_integrity = &bp->bip1;
        bp->bio2.bi_integrity = &bp->bip2;
 
-       bp->iv1 = bip->bip_vec[0];
-       bp->iv2 = bip->bip_vec[0];
+       bp->iv1 = bip->bip_vec[bip->bip_idx];
+       bp->iv2 = bip->bip_vec[bip->bip_idx];
 
-       bp->bip1.bip_vec[0] = bp->iv1;
-       bp->bip2.bip_vec[0] = bp->iv2;
+       bp->bip1.bip_vec = &bp->iv1;
+       bp->bip2.bip_vec = &bp->iv2;
 
        bp->iv1.bv_len = sectors * bi->tuple_size;
        bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -746,13 +712,14 @@ EXPORT_SYMBOL(bio_integrity_clone);
 
 int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
-       unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
-
        if (bs->bio_integrity_pool)
                return 0;
 
-       bs->bio_integrity_pool =
-               mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
+       bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
+
+       bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
+       if (!bs->bvec_integrity_pool)
+               return -1;
 
        if (!bs->bio_integrity_pool)
                return -1;
@@ -765,13 +732,14 @@ void bioset_integrity_free(struct bio_set *bs)
 {
        if (bs->bio_integrity_pool)
                mempool_destroy(bs->bio_integrity_pool);
+
+       if (bs->bvec_integrity_pool)
+               mempool_destroy(bs->bio_integrity_pool);
 }
 EXPORT_SYMBOL(bioset_integrity_free);
 
 void __init bio_integrity_init(void)
 {
-       unsigned int i;
-
        /*
         * kintegrityd won't block much but may burn a lot of CPU cycles.
         * Make it highpri CPU intensive wq with max concurrency of 1.
@@ -781,14 +749,10 @@ void __init bio_integrity_init(void)
        if (!kintegrityd_wq)
                panic("Failed to create kintegrityd\n");
 
-       for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
-               unsigned int size;
-
-               size = sizeof(struct bio_integrity_payload)
-                       + bip_slab[i].nr_vecs * sizeof(struct bio_vec);
-
-               bip_slab[i].slab =
-                       kmem_cache_create(bip_slab[i].name, size, 0,
-                                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-       }
+       bip_slab = kmem_cache_create("bio_integrity_payload",
+                                    sizeof(struct bio_integrity_payload) +
+                                    sizeof(struct bio_vec) * BIP_INLINE_VECS,
+                                    0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+       if (!bip_slab)
+               panic("Failed to create slab\n");
 }
index bb5768f59b32e22fdcde250f0ae8b01eb6b69b48..9238a54b562c9bb0b672f4bd6359871e192c21d8 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -160,12 +160,12 @@ unsigned int bvec_nr_vecs(unsigned short idx)
        return bvec_slabs[idx].nr_vecs;
 }
 
-void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
+void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
 {
        BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
 
        if (idx == BIOVEC_MAX_IDX)
-               mempool_free(bv, bs->bvec_pool);
+               mempool_free(bv, pool);
        else {
                struct biovec_slab *bvs = bvec_slabs + idx;
 
@@ -173,8 +173,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
        }
 }
 
-struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
-                             struct bio_set *bs)
+struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
+                          mempool_t *pool)
 {
        struct bio_vec *bvl;
 
@@ -210,7 +210,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
         */
        if (*idx == BIOVEC_MAX_IDX) {
 fallback:
-               bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
+               bvl = mempool_alloc(pool, gfp_mask);
        } else {
                struct biovec_slab *bvs = bvec_slabs + *idx;
                gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
@@ -252,8 +252,8 @@ static void bio_free(struct bio *bio)
        __bio_free(bio);
 
        if (bs) {
-               if (bio_has_allocated_vec(bio))
-                       bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
+               if (bio_flagged(bio, BIO_OWNS_VEC))
+                       bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
                /*
                 * If we have front padding, adjust the bio pointer before freeing
@@ -297,6 +297,54 @@ void bio_reset(struct bio *bio)
 }
 EXPORT_SYMBOL(bio_reset);
 
+static void bio_alloc_rescue(struct work_struct *work)
+{
+       struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
+       struct bio *bio;
+
+       while (1) {
+               spin_lock(&bs->rescue_lock);
+               bio = bio_list_pop(&bs->rescue_list);
+               spin_unlock(&bs->rescue_lock);
+
+               if (!bio)
+                       break;
+
+               generic_make_request(bio);
+       }
+}
+
+static void punt_bios_to_rescuer(struct bio_set *bs)
+{
+       struct bio_list punt, nopunt;
+       struct bio *bio;
+
+       /*
+        * In order to guarantee forward progress we must punt only bios that
+        * were allocated from this bio_set; otherwise, if there was a bio on
+        * there for a stacking driver higher up in the stack, processing it
+        * could require allocating bios from this bio_set, and doing that from
+        * our own rescuer would be bad.
+        *
+        * Since bio lists are singly linked, pop them all instead of trying to
+        * remove from the middle of the list:
+        */
+
+       bio_list_init(&punt);
+       bio_list_init(&nopunt);
+
+       while ((bio = bio_list_pop(current->bio_list)))
+               bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+
+       *current->bio_list = nopunt;
+
+       spin_lock(&bs->rescue_lock);
+       bio_list_merge(&bs->rescue_list, &punt);
+       spin_unlock(&bs->rescue_lock);
+
+       queue_work(bs->rescue_workqueue, &bs->rescue_work);
+}
+
 /**
  * bio_alloc_bioset - allocate a bio for I/O
  * @gfp_mask:   the GFP_ mask given to the slab allocator
@@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset);
  *   previously allocated bio for IO before attempting to allocate a new one.
  *   Failure to do so can cause deadlocks under memory pressure.
  *
+ *   Note that when running under generic_make_request() (i.e. any block
+ *   driver), bios are not submitted until after you return - see the code in
+ *   generic_make_request() that converts recursion into iteration, to prevent
+ *   stack overflows.
+ *
+ *   This would normally mean allocating multiple bios under
+ *   generic_make_request() would be susceptible to deadlocks, but we have
+ *   deadlock avoidance code that resubmits any blocked bios from a rescuer
+ *   thread.
+ *
+ *   However, we do not guarantee forward progress for allocations from other
+ *   mempools. Doing multiple allocations from the same mempool under
+ *   generic_make_request() should be avoided - instead, use bio_set's front_pad
+ *   for per bio allocations.
+ *
  *   RETURNS:
  *   Pointer to new bio on success, NULL on failure.
  */
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
+       gfp_t saved_gfp = gfp_mask;
        unsigned front_pad;
        unsigned inline_vecs;
        unsigned long idx = BIO_POOL_NONE;
@@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                front_pad = 0;
                inline_vecs = nr_iovecs;
        } else {
+               /*
+                * generic_make_request() converts recursion to iteration; this
+                * means if we're running beneath it, any bios we allocate and
+                * submit will not be submitted (and thus freed) until after we
+                * return.
+                *
+                * This exposes us to a potential deadlock if we allocate
+                * multiple bios from the same bio_set() while running
+                * underneath generic_make_request(). If we were to allocate
+                * multiple bios (say a stacking block driver that was splitting
+                * bios), we would deadlock if we exhausted the mempool's
+                * reserve.
+                *
+                * We solve this, and guarantee forward progress, with a rescuer
+                * workqueue per bio_set. If we go to allocate and there are
+                * bios on current->bio_list, we first try the allocation
+                * without __GFP_WAIT; if that fails, we punt those bios we
+                * would be blocking to the rescuer workqueue before we retry
+                * with the original gfp_flags.
+                */
+
+               if (current->bio_list && !bio_list_empty(current->bio_list))
+                       gfp_mask &= ~__GFP_WAIT;
+
                p = mempool_alloc(bs->bio_pool, gfp_mask);
+               if (!p && gfp_mask != saved_gfp) {
+                       punt_bios_to_rescuer(bs);
+                       gfp_mask = saved_gfp;
+                       p = mempool_alloc(bs->bio_pool, gfp_mask);
+               }
+
                front_pad = bs->front_pad;
                inline_vecs = BIO_INLINE_VECS;
        }
@@ -348,9 +442,17 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
        bio_init(bio);
 
        if (nr_iovecs > inline_vecs) {
-               bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+               bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+               if (!bvl && gfp_mask != saved_gfp) {
+                       punt_bios_to_rescuer(bs);
+                       gfp_mask = saved_gfp;
+                       bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+               }
+
                if (unlikely(!bvl))
                        goto err_free;
+
+               bio->bi_flags |= 1 << BIO_OWNS_VEC;
        } else if (nr_iovecs) {
                bvl = bio->bi_inline_vecs;
        }
@@ -652,6 +754,181 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 }
 EXPORT_SYMBOL(bio_add_page);
 
+struct submit_bio_ret {
+       struct completion event;
+       int error;
+};
+
+static void submit_bio_wait_endio(struct bio *bio, int error)
+{
+       struct submit_bio_ret *ret = bio->bi_private;
+
+       ret->error = error;
+       complete(&ret->event);
+}
+
+/**
+ * submit_bio_wait - submit a bio, and wait until it completes
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bio: The &struct bio which describes the I/O
+ *
+ * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
+ * bio_endio() on failure.
+ */
+int submit_bio_wait(int rw, struct bio *bio)
+{
+       struct submit_bio_ret ret;
+
+       rw |= REQ_SYNC;
+       init_completion(&ret.event);
+       bio->bi_private = &ret;
+       bio->bi_end_io = submit_bio_wait_endio;
+       submit_bio(rw, bio);
+       wait_for_completion(&ret.event);
+
+       return ret.error;
+}
+EXPORT_SYMBOL(submit_bio_wait);
+
+/**
+ * bio_advance - increment/complete a bio by some number of bytes
+ * @bio:       bio to advance
+ * @bytes:     number of bytes to complete
+ *
+ * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
+ * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
+ * be updated on the last bvec as well.
+ *
+ * @bio will then represent the remaining, uncompleted portion of the io.
+ */
+void bio_advance(struct bio *bio, unsigned bytes)
+{
+       if (bio_integrity(bio))
+               bio_integrity_advance(bio, bytes);
+
+       bio->bi_sector += bytes >> 9;
+       bio->bi_size -= bytes;
+
+       if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+               return;
+
+       while (bytes) {
+               if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
+                       WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
+                                 bio->bi_idx, bio->bi_vcnt);
+                       break;
+               }
+
+               if (bytes >= bio_iovec(bio)->bv_len) {
+                       bytes -= bio_iovec(bio)->bv_len;
+                       bio->bi_idx++;
+               } else {
+                       bio_iovec(bio)->bv_len -= bytes;
+                       bio_iovec(bio)->bv_offset += bytes;
+                       bytes = 0;
+               }
+       }
+}
+EXPORT_SYMBOL(bio_advance);
+
+/**
+ * bio_alloc_pages - allocates a single page for each bvec in a bio
+ * @bio: bio to allocate pages for
+ * @gfp_mask: flags for allocation
+ *
+ * Allocates pages up to @bio->bi_vcnt.
+ *
+ * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
+ * freed.
+ */
+int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
+{
+       int i;
+       struct bio_vec *bv;
+
+       bio_for_each_segment_all(bv, bio, i) {
+               bv->bv_page = alloc_page(gfp_mask);
+               if (!bv->bv_page) {
+                       while (--bv >= bio->bi_io_vec)
+                               __free_page(bv->bv_page);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(bio_alloc_pages);
+
+/**
+ * bio_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
+ * @src and @dst as linked lists of bios.
+ *
+ * Stops when it reaches the end of either @src or @dst - that is, copies
+ * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
+ */
+void bio_copy_data(struct bio *dst, struct bio *src)
+{
+       struct bio_vec *src_bv, *dst_bv;
+       unsigned src_offset, dst_offset, bytes;
+       void *src_p, *dst_p;
+
+       src_bv = bio_iovec(src);
+       dst_bv = bio_iovec(dst);
+
+       src_offset = src_bv->bv_offset;
+       dst_offset = dst_bv->bv_offset;
+
+       while (1) {
+               if (src_offset == src_bv->bv_offset + src_bv->bv_len) {
+                       src_bv++;
+                       if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) {
+                               src = src->bi_next;
+                               if (!src)
+                                       break;
+
+                               src_bv = bio_iovec(src);
+                       }
+
+                       src_offset = src_bv->bv_offset;
+               }
+
+               if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) {
+                       dst_bv++;
+                       if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) {
+                               dst = dst->bi_next;
+                               if (!dst)
+                                       break;
+
+                               dst_bv = bio_iovec(dst);
+                       }
+
+                       dst_offset = dst_bv->bv_offset;
+               }
+
+               bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset,
+                           src_bv->bv_offset + src_bv->bv_len - src_offset);
+
+               src_p = kmap_atomic(src_bv->bv_page);
+               dst_p = kmap_atomic(dst_bv->bv_page);
+
+               memcpy(dst_p + dst_bv->bv_offset,
+                      src_p + src_bv->bv_offset,
+                      bytes);
+
+               kunmap_atomic(dst_p);
+               kunmap_atomic(src_p);
+
+               src_offset += bytes;
+               dst_offset += bytes;
+       }
+}
+EXPORT_SYMBOL(bio_copy_data);
+
 struct bio_map_data {
        struct bio_vec *iovecs;
        struct sg_iovec *sgvecs;
@@ -714,7 +991,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
        int iov_idx = 0;
        unsigned int iov_off = 0;
 
-       __bio_for_each_segment(bvec, bio, i, 0) {
+       bio_for_each_segment_all(bvec, bio, i) {
                char *bv_addr = page_address(bvec->bv_page);
                unsigned int bv_len = iovecs[i].bv_len;
 
@@ -896,7 +1173,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
        return bio;
 cleanup:
        if (!map_data)
-               bio_for_each_segment(bvec, bio, i)
+               bio_for_each_segment_all(bvec, bio, i)
                        __free_page(bvec->bv_page);
 
        bio_put(bio);
@@ -1110,7 +1387,7 @@ static void __bio_unmap_user(struct bio *bio)
        /*
         * make sure we dirty pages we wrote to
         */
-       __bio_for_each_segment(bvec, bio, i, 0) {
+       bio_for_each_segment_all(bvec, bio, i) {
                if (bio_data_dir(bio) == READ)
                        set_page_dirty_lock(bvec->bv_page);
 
@@ -1216,7 +1493,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
        int i;
        char *p = bmd->sgvecs[0].iov_base;
 
-       __bio_for_each_segment(bvec, bio, i, 0) {
+       bio_for_each_segment_all(bvec, bio, i) {
                char *addr = page_address(bvec->bv_page);
                int len = bmd->iovecs[i].bv_len;
 
@@ -1256,7 +1533,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
        if (!reading) {
                void *p = data;
 
-               bio_for_each_segment(bvec, bio, i) {
+               bio_for_each_segment_all(bvec, bio, i) {
                        char *addr = page_address(bvec->bv_page);
 
                        memcpy(addr, p, bvec->bv_len);
@@ -1301,11 +1578,11 @@ EXPORT_SYMBOL(bio_copy_kern);
  */
 void bio_set_pages_dirty(struct bio *bio)
 {
-       struct bio_vec *bvec = bio->bi_io_vec;
+       struct bio_vec *bvec;
        int i;
 
-       for (i = 0; i < bio->bi_vcnt; i++) {
-               struct page *page = bvec[i].bv_page;
+       bio_for_each_segment_all(bvec, bio, i) {
+               struct page *page = bvec->bv_page;
 
                if (page && !PageCompound(page))
                        set_page_dirty_lock(page);
@@ -1314,11 +1591,11 @@ void bio_set_pages_dirty(struct bio *bio)
 
 static void bio_release_pages(struct bio *bio)
 {
-       struct bio_vec *bvec = bio->bi_io_vec;
+       struct bio_vec *bvec;
        int i;
 
-       for (i = 0; i < bio->bi_vcnt; i++) {
-               struct page *page = bvec[i].bv_page;
+       bio_for_each_segment_all(bvec, bio, i) {
+               struct page *page = bvec->bv_page;
 
                if (page)
                        put_page(page);
@@ -1367,16 +1644,16 @@ static void bio_dirty_fn(struct work_struct *work)
 
 void bio_check_pages_dirty(struct bio *bio)
 {
-       struct bio_vec *bvec = bio->bi_io_vec;
+       struct bio_vec *bvec;
        int nr_clean_pages = 0;
        int i;
 
-       for (i = 0; i < bio->bi_vcnt; i++) {
-               struct page *page = bvec[i].bv_page;
+       bio_for_each_segment_all(bvec, bio, i) {
+               struct page *page = bvec->bv_page;
 
                if (PageDirty(page) || PageCompound(page)) {
                        page_cache_release(page);
-                       bvec[i].bv_page = NULL;
+                       bvec->bv_page = NULL;
                } else {
                        nr_clean_pages++;
                }
@@ -1479,8 +1756,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
        trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
                                bi->bi_sector + first_sectors);
 
-       BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
-       BUG_ON(bi->bi_idx != 0);
+       BUG_ON(bio_segments(bi) > 1);
        atomic_set(&bp->cnt, 3);
        bp->error = 0;
        bp->bio1 = *bi;
@@ -1490,8 +1766,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
        bp->bio1.bi_size = first_sectors << 9;
 
        if (bi->bi_vcnt != 0) {
-               bp->bv1 = bi->bi_io_vec[0];
-               bp->bv2 = bi->bi_io_vec[0];
+               bp->bv1 = *bio_iovec(bi);
+               bp->bv2 = *bio_iovec(bi);
 
                if (bio_is_rw(bi)) {
                        bp->bv2.bv_offset += first_sectors << 9;
@@ -1543,7 +1819,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index,
        if (index >= bio->bi_idx)
                index = bio->bi_vcnt - 1;
 
-       __bio_for_each_segment(bv, bio, i, 0) {
+       bio_for_each_segment_all(bv, bio, i) {
                if (i == index) {
                        if (offset > bv->bv_offset)
                                sectors += (offset - bv->bv_offset) / sector_sz;
@@ -1561,29 +1837,25 @@ EXPORT_SYMBOL(bio_sector_offset);
  * create memory pools for biovec's in a bio_set.
  * use the global biovec slabs created for general use.
  */
-static int biovec_create_pools(struct bio_set *bs, int pool_entries)
+mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries)
 {
        struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
 
-       bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
-       if (!bs->bvec_pool)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static void biovec_free_pools(struct bio_set *bs)
-{
-       mempool_destroy(bs->bvec_pool);
+       return mempool_create_slab_pool(pool_entries, bp->slab);
 }
 
 void bioset_free(struct bio_set *bs)
 {
+       if (bs->rescue_workqueue)
+               destroy_workqueue(bs->rescue_workqueue);
+
        if (bs->bio_pool)
                mempool_destroy(bs->bio_pool);
 
+       if (bs->bvec_pool)
+               mempool_destroy(bs->bvec_pool);
+
        bioset_integrity_free(bs);
-       biovec_free_pools(bs);
        bio_put_slab(bs);
 
        kfree(bs);
@@ -1614,6 +1886,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 
        bs->front_pad = front_pad;
 
+       spin_lock_init(&bs->rescue_lock);
+       bio_list_init(&bs->rescue_list);
+       INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
+
        bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
        if (!bs->bio_slab) {
                kfree(bs);
@@ -1624,9 +1900,15 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
        if (!bs->bio_pool)
                goto bad;
 
-       if (!biovec_create_pools(bs, pool_size))
-               return bs;
+       bs->bvec_pool = biovec_create_pool(bs, pool_size);
+       if (!bs->bvec_pool)
+               goto bad;
+
+       bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
+       if (!bs->rescue_workqueue)
+               goto bad;
 
+       return bs;
 bad:
        bioset_free(bs);
        return NULL;
index cdee391fc7bfd57c596204a8474142e7afe60335..73f2bfe3ac9302091608beae85b4aecf28622240 100644 (file)
@@ -2560,8 +2560,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                if (old_compressed)
                        contig = bio->bi_sector == sector;
                else
-                       contig = bio->bi_sector + (bio->bi_size >> 9) ==
-                               sector;
+                       contig = bio_end_sector(bio) == sector;
 
                if (prev_bio_flags != bio_flags || !contig ||
                    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
index 2854c824ab6443d04eac19cf0b540f79c257cfa0..6789772265707bdd02d97f91dc92e8b34a2fd6bb 100644 (file)
@@ -5177,7 +5177,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
        }
 
        prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
-       if ((bio->bi_size >> 9) > max_sectors)
+       if (bio_sectors(bio) > max_sectors)
                return 0;
 
        if (!q->merge_bvec_fn)
index b4dcb34c9635ae61b747bb9d4816477a49a8d90a..ecd3792ae0e9f886429cace691d68d8284662741 100644 (file)
@@ -2979,7 +2979,6 @@ int submit_bh(int rw, struct buffer_head * bh)
        bio->bi_io_vec[0].bv_offset = bh_offset(bh);
 
        bio->bi_vcnt = 1;
-       bio->bi_idx = 0;
        bio->bi_size = bh->b_size;
 
        bio->bi_end_io = end_bio_bh_io_sync;
index f853263cf74f8abf29016db5e44eaa9e0de44abc..38484b08a39ac93ef024a72b9b2833b84e6c44f2 100644 (file)
@@ -441,8 +441,8 @@ static struct bio *dio_await_one(struct dio *dio)
 static int dio_bio_complete(struct dio *dio, struct bio *bio)
 {
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec;
-       int page_no;
+       struct bio_vec *bvec;
+       unsigned i;
 
        if (!uptodate)
                dio->io_error = -EIO;
@@ -450,8 +450,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
        if (dio->is_async && dio->rw == READ) {
                bio_check_pages_dirty(bio);     /* transfers ownership */
        } else {
-               for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
-                       struct page *page = bvec[page_no].bv_page;
+               bio_for_each_segment_all(bvec, bio, i) {
+                       struct page *page = bvec->bv_page;
 
                        if (dio->rw == READ && !PageCompound(page))
                                set_page_dirty_lock(page);
index f936cb50dc0d524250dae6d6ac5382db5c1f25b7..b744228886043d522fa15a32ff98cc80d29db3c9 100644 (file)
@@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio)
        struct bio_vec *bv;
        unsigned i;
 
-       __bio_for_each_segment(bv, bio, i, 0) {
+       bio_for_each_segment_all(bv, bio, i) {
                unsigned this_count = bv->bv_len;
 
                if (likely(PAGE_SIZE == this_count))
index b963f38ac298e82404befa6265c171aca4584aba..7682b970d0f1352cd019c8076ea55cc470ecd523 100644 (file)
@@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
                if (!bio)
                        continue;
 
-               __bio_for_each_segment(bv, bio, i, 0) {
+               bio_for_each_segment_all(bv, bio, i) {
                        struct page *page = bv->bv_page;
 
                        SetPageUptodate(page);
index a5055977a214a1c99bff6e7f099a9fb4825e4f5f..5c37ef982390eb361684ba796776edbb64938c97 100644 (file)
@@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
        u64 nblk;
 
        if (bio) {
-               nblk = bio->bi_sector + bio_sectors(bio);
+               nblk = bio_end_sector(bio);
                nblk >>= sdp->sd_fsb2bb_shift;
                if (blkno == nblk)
                        return bio;
index 2eb952c41a69da5f23314e6d3a9a3012692b3b76..8ae5e350da430132ca37c94ff8e22d6053b76877 100644 (file)
@@ -2004,7 +2004,6 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
        bio->bi_io_vec[0].bv_offset = bp->l_offset;
 
        bio->bi_vcnt = 1;
-       bio->bi_idx = 0;
        bio->bi_size = LOGPSIZE;
 
        bio->bi_end_io = lbmIODone;
@@ -2145,7 +2144,6 @@ static void lbmStartIO(struct lbuf * bp)
        bio->bi_io_vec[0].bv_offset = bp->l_offset;
 
        bio->bi_vcnt = 1;
-       bio->bi_idx = 0;
        bio->bi_size = LOGPSIZE;
 
        bio->bi_end_io = lbmIODone;
index e784a217b50067919ad3ebffe559b3552b58a9bc..550475ca6a0e0ec35c82d90b10372f2e4434fe90 100644 (file)
@@ -32,7 +32,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
        bio_vec.bv_len = PAGE_SIZE;
        bio_vec.bv_offset = 0;
        bio.bi_vcnt = 1;
-       bio.bi_idx = 0;
        bio.bi_size = PAGE_SIZE;
        bio.bi_bdev = bdev;
        bio.bi_sector = page->index * (PAGE_SIZE >> 9);
@@ -108,7 +107,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
                if (i >= max_pages) {
                        /* Block layer cannot split bios :( */
                        bio->bi_vcnt = i;
-                       bio->bi_idx = 0;
                        bio->bi_size = i * PAGE_SIZE;
                        bio->bi_bdev = super->s_bdev;
                        bio->bi_sector = ofs >> 9;
@@ -136,7 +134,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
                unlock_page(page);
        }
        bio->bi_vcnt = nr_pages;
-       bio->bi_idx = 0;
        bio->bi_size = nr_pages * PAGE_SIZE;
        bio->bi_bdev = super->s_bdev;
        bio->bi_sector = ofs >> 9;
@@ -202,7 +199,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
                if (i >= max_pages) {
                        /* Block layer cannot split bios :( */
                        bio->bi_vcnt = i;
-                       bio->bi_idx = 0;
                        bio->bi_size = i * PAGE_SIZE;
                        bio->bi_bdev = super->s_bdev;
                        bio->bi_sector = ofs >> 9;
@@ -224,7 +220,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
                bio->bi_io_vec[i].bv_offset = 0;
        }
        bio->bi_vcnt = nr_pages;
-       bio->bi_idx = 0;
        bio->bi_size = nr_pages * PAGE_SIZE;
        bio->bi_bdev = super->s_bdev;
        bio->bi_sector = ofs >> 9;
index 820e7aaad4fdbbf432b188b083662b5015bd3905..ef24466d8f82516a76029577df9c8f1ec530cf20 100644 (file)
@@ -67,6 +67,7 @@
 #define bio_offset(bio)                bio_iovec((bio))->bv_offset
 #define bio_segments(bio)      ((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)       ((bio)->bi_size >> 9)
+#define bio_end_sector(bio)    ((bio)->bi_sector + bio_sectors((bio)))
 
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
@@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio)
        return NULL;
 }
 
-static inline int bio_has_allocated_vec(struct bio *bio)
-{
-       return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
-}
-
 /*
  * will die
  */
@@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio)
 #define bio_io_error(bio) bio_endio((bio), -EIO)
 
 /*
- * drivers should not use the __ version unless they _really_ want to
- * run through the entire bio and not just pending pieces
+ * drivers should not use the __ version unless they _really_ know what
+ * they're doing
  */
 #define __bio_for_each_segment(bvl, bio, i, start_idx)                 \
        for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx);  \
             i < (bio)->bi_vcnt;                                        \
             bvl++, i++)
 
+/*
+ * drivers should _never_ use the all version - the bio may have been split
+ * before it got to the driver and the driver won't own all of it
+ */
+#define bio_for_each_segment_all(bvl, bio, i)                          \
+       for (i = 0;                                                     \
+            bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt;       \
+            i++)
+
 #define bio_for_each_segment(bvl, bio, i)                              \
-       __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
+       for (i = (bio)->bi_idx;                                         \
+            bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt;       \
+            i++)
 
 /*
  * get a reference to a bio, so it won't disappear. the intended use is
@@ -180,9 +187,12 @@ struct bio_integrity_payload {
        unsigned short          bip_slab;       /* slab the bip came from */
        unsigned short          bip_vcnt;       /* # of integrity bio_vecs */
        unsigned short          bip_idx;        /* current bip_vec index */
+       unsigned                bip_owns_buf:1; /* should free bip_buf */
 
        struct work_struct      bip_work;       /* I/O completion */
-       struct bio_vec          bip_vec[0];     /* embedded bvec array */
+
+       struct bio_vec          *bip_vec;
+       struct bio_vec          bip_inline_vecs[0];/* embedded bvec array */
 };
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
@@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio);
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
+extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
 
 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
@@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int);
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
+extern int submit_bio_wait(int rw, struct bio *bio);
+extern void bio_advance(struct bio *, unsigned);
+
 extern void bio_init(struct bio *);
 extern void bio_reset(struct bio *);
 
@@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
 }
 #endif
 
+extern void bio_copy_data(struct bio *dst, struct bio *src);
+extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
+
 extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
                                 unsigned long, unsigned int, int, gfp_t);
 extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
                                     int, int, gfp_t);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
-extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
-extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
+extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
+extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #ifdef CONFIG_BLK_CGROUP
@@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
 static inline void bio_disassociate_task(struct bio *bio) { }
 #endif /* CONFIG_BLK_CGROUP */
 
-/*
- * bio_set is used to allow other portions of the IO system to
- * allocate their own private memory pools for bio and iovec structures.
- * These memory pools in turn all allocate from the bio_slab
- * and the bvec_slabs[].
- */
-#define BIO_POOL_SIZE 2
-#define BIOVEC_NR_POOLS 6
-#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
-
-struct bio_set {
-       struct kmem_cache *bio_slab;
-       unsigned int front_pad;
-
-       mempool_t *bio_pool;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-       mempool_t *bio_integrity_pool;
-#endif
-       mempool_t *bvec_pool;
-};
-
-struct biovec_slab {
-       int nr_vecs;
-       char *name;
-       struct kmem_cache *slab;
-};
-
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
-
 #ifdef CONFIG_HIGHMEM
 /*
  * remember never ever reenable interrupts between a bvec_kmap_irq and
@@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
        return bio;
 }
 
+/*
+ * bio_set is used to allow other portions of the IO system to
+ * allocate their own private memory pools for bio and iovec structures.
+ * These memory pools in turn all allocate from the bio_slab
+ * and the bvec_slabs[].
+ */
+#define BIO_POOL_SIZE 2
+#define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
+
+struct bio_set {
+       struct kmem_cache *bio_slab;
+       unsigned int front_pad;
+
+       mempool_t *bio_pool;
+       mempool_t *bvec_pool;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+       mempool_t *bio_integrity_pool;
+       mempool_t *bvec_integrity_pool;
+#endif
+
+       /*
+        * Deadlock avoidance for stacking block drivers: see comments in
+        * bio_alloc_bioset() for details
+        */
+       spinlock_t              rescue_lock;
+       struct bio_list         rescue_list;
+       struct work_struct      rescue_work;
+       struct workqueue_struct *rescue_workqueue;
+};
+
+struct biovec_slab {
+       int nr_vecs;
+       char *name;
+       struct kmem_cache *slab;
+};
+
+/*
+ * a small number of entries is fine, not going to be performance critical.
+ * basically we just need to survive
+ */
+#define BIO_SPLIT_ENTRIES 2
+
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
 #define bip_vec_idx(bip, idx)  (&(bip->bip_vec[(idx)]))
index cdf11191e6450fa680b7a8b71352c3642633b9fb..e8de67053cd4b15264c599644e1dc012273604fd 100644 (file)
@@ -117,6 +117,7 @@ struct bio {
  * BIO_POOL_IDX()
  */
 #define BIO_RESET_BITS 12
+#define BIO_OWNS_VEC   12      /* bio_free() should free bvec */
 
 #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
 
@@ -175,6 +176,7 @@ enum rq_flag_bits {
        __REQ_IO_STAT,          /* account I/O stat */
        __REQ_MIXED_MERGE,      /* merge of different types, fail separately */
        __REQ_KERNEL,           /* direct IO to kernel pages */
+       __REQ_PM,               /* runtime pm request */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -197,6 +199,8 @@ enum rq_flag_bits {
         REQ_SECURE)
 #define REQ_CLONE_MASK         REQ_COMMON_MASK
 
+#define BIO_NO_ADVANCE_ITER_MASK       (REQ_DISCARD|REQ_WRITE_SAME)
+
 /* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
        (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
@@ -223,5 +227,6 @@ enum rq_flag_bits {
 #define REQ_MIXED_MERGE                (1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE             (1 << __REQ_SECURE)
 #define REQ_KERNEL             (1 << __REQ_KERNEL)
+#define REQ_PM                 (1 << __REQ_PM)
 
 #endif /* __LINUX_BLK_TYPES_H */
index 78feda9bbae2632b2c0e61493e31b909af55f522..89d89c7162aab6e40f66cb8325d89abafbb0886a 100644 (file)
@@ -361,6 +361,12 @@ struct request_queue {
         */
        struct kobject kobj;
 
+#ifdef CONFIG_PM_RUNTIME
+       struct device           *dev;
+       int                     rpm_status;
+       unsigned int            nr_pending;
+#endif
+
        /*
         * queue settings
         */
@@ -960,6 +966,27 @@ struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 
+/*
+ * block layer runtime pm functions
+ */
+#ifdef CONFIG_PM_RUNTIME
+extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
+extern int blk_pre_runtime_suspend(struct request_queue *q);
+extern void blk_post_runtime_suspend(struct request_queue *q, int err);
+extern void blk_pre_runtime_resume(struct request_queue *q);
+extern void blk_post_runtime_resume(struct request_queue *q, int err);
+#else
+static inline void blk_pm_runtime_init(struct request_queue *q,
+       struct device *dev) {}
+static inline int blk_pre_runtime_suspend(struct request_queue *q)
+{
+       return -ENOSYS;
+}
+static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
+static inline void blk_pre_runtime_resume(struct request_queue *q) {}
+static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
+#endif
+
 /*
  * blk_plug permits building a queue of related requests by holding the I/O
  * fragments for a short period. This allows merging of sequential requests
index 9961726523d01251957b2f8c661422c31d1ac78c..5a28843725dfa96a70468b09cdc960f8abaf8697 100644 (file)
@@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce,
                __entry->dev            = bio->bi_bdev ?
                                          bio->bi_bdev->bd_dev : 0;
                __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->nr_sector      = bio_sectors(bio);
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
@@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete,
                __entry->dev            = bio->bi_bdev ?
                                          bio->bi_bdev->bd_dev : 0;
                __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->nr_sector      = bio_sectors(bio);
                __entry->error          = error;
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
        ),
@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
        TP_fast_assign(
                __entry->dev            = bio->bi_bdev->bd_dev;
                __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->nr_sector      = bio_sectors(bio);
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
@@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue,
        TP_fast_assign(
                __entry->dev            = bio->bi_bdev->bd_dev;
                __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->nr_sector      = bio_sectors(bio);
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
@@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
        TP_fast_assign(
                __entry->dev            = bio ? bio->bi_bdev->bd_dev : 0;
                __entry->sector         = bio ? bio->bi_sector : 0;
-               __entry->nr_sector      = bio ? bio->bi_size >> 9 : 0;
+               __entry->nr_sector      = bio ? bio_sectors(bio) : 0;
                blk_fill_rwbs(__entry->rwbs,
                              bio ? bio->bi_rw : 0, __entry->nr_sector);
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
@@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap,
        TP_fast_assign(
                __entry->dev            = bio->bi_bdev->bd_dev;
                __entry->sector         = bio->bi_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->nr_sector      = bio_sectors(bio);
                __entry->old_dev        = dev;
                __entry->old_sector     = from;
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
index 5f89017686025f9201d62486bde9dbf078dc4423..f5326b24d65d88c1051ee9547b0a66373372940c 100644 (file)
@@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
        struct bio_vec *tovec, *fromvec;
        int i;
 
-       __bio_for_each_segment(tovec, to, i, 0) {
+       bio_for_each_segment(tovec, to, i) {
                fromvec = from->bi_io_vec + i;
 
                /*
@@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
        /*
         * free up bounce indirect pages used
         */
-       __bio_for_each_segment(bvec, bio, i, 0) {
+       bio_for_each_segment_all(bvec, bio, i) {
                org_vec = bio_orig->bi_io_vec + i;
                if (bvec->bv_page == org_vec->bv_page)
                        continue;
@@ -218,78 +218,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
 static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
                               mempool_t *pool, int force)
 {
-       struct page *page;
-       struct bio *bio = NULL;
-       int i, rw = bio_data_dir(*bio_orig);
+       struct bio *bio;
+       int rw = bio_data_dir(*bio_orig);
        struct bio_vec *to, *from;
+       unsigned i;
 
-       bio_for_each_segment(from, *bio_orig, i) {
-               page = from->bv_page;
+       bio_for_each_segment(from, *bio_orig, i)
+               if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
+                       goto bounce;
 
-               /*
-                * is destination page below bounce pfn?
-                */
-               if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
-                       continue;
+       return;
+bounce:
+       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
 
-               /*
-                * irk, bounce it
-                */
-               if (!bio) {
-                       unsigned int cnt = (*bio_orig)->bi_vcnt;
-
-                       bio = bio_alloc(GFP_NOIO, cnt);
-                       memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
-               }
-                       
+       bio_for_each_segment_all(to, bio, i) {
+               struct page *page = to->bv_page;
 
-               to = bio->bi_io_vec + i;
+               if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
+                       continue;
 
-               to->bv_page = mempool_alloc(pool, q->bounce_gfp);
-               to->bv_len = from->bv_len;
-               to->bv_offset = from->bv_offset;
                inc_zone_page_state(to->bv_page, NR_BOUNCE);
+               to->bv_page = mempool_alloc(pool, q->bounce_gfp);
 
                if (rw == WRITE) {
                        char *vto, *vfrom;
 
-                       flush_dcache_page(from->bv_page);
+                       flush_dcache_page(page);
+
                        vto = page_address(to->bv_page) + to->bv_offset;
-                       vfrom = kmap(from->bv_page) + from->bv_offset;
+                       vfrom = kmap_atomic(page) + to->bv_offset;
                        memcpy(vto, vfrom, to->bv_len);
-                       kunmap(from->bv_page);
+                       kunmap_atomic(vfrom);
                }
        }
 
-       /*
-        * no pages bounced
-        */
-       if (!bio)
-               return;
-
        trace_block_bio_bounce(q, *bio_orig);
 
-       /*
-        * at least one page was bounced, fill in possible non-highmem
-        * pages
-        */
-       __bio_for_each_segment(from, *bio_orig, i, 0) {
-               to = bio_iovec_idx(bio, i);
-               if (!to->bv_page) {
-                       to->bv_page = from->bv_page;
-                       to->bv_len = from->bv_len;
-                       to->bv_offset = from->bv_offset;
-               }
-       }
-
-       bio->bi_bdev = (*bio_orig)->bi_bdev;
        bio->bi_flags |= (1 << BIO_BOUNCED);
-       bio->bi_sector = (*bio_orig)->bi_sector;
-       bio->bi_rw = (*bio_orig)->bi_rw;
-
-       bio->bi_vcnt = (*bio_orig)->bi_vcnt;
-       bio->bi_idx = (*bio_orig)->bi_idx;
-       bio->bi_size = (*bio_orig)->bi_size;
 
        if (pool == page_pool) {
                bio->bi_end_io = bounce_end_io_write;
index 78eee32ee4860b17155413bb37c211bbd8b9b60a..8d3c0c088105e2e6127eb1d27fcd2a88dd0193c3 100644 (file)
@@ -35,7 +35,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
                bio->bi_io_vec[0].bv_len = PAGE_SIZE;
                bio->bi_io_vec[0].bv_offset = 0;
                bio->bi_vcnt = 1;
-               bio->bi_idx = 0;
                bio->bi_size = PAGE_SIZE;
                bio->bi_end_io = end_io;
        }