Merge branch 'for-4.2/core' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 25 Jun 2015 21:29:53 +0000 (14:29 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 25 Jun 2015 21:29:53 +0000 (14:29 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 21:29:53 +0000 (14:29 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 21:29:53 +0000 (14:29 -0700)
diff --git a/MAINTAINERS b/MAINTAINERS

index 798dc538529ce47c282f9a7c0f2bd77aa4cb6bbc..d42970b10a22b093a0da02020bbe83e13ce1fd09 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2075,6 +2075,7 @@ M:        Jens Axboe <axboe@kernel.dk>
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
  S:     Maintained
  F:     block/
+F:     kernel/trace/blktrace.c
  
  BLOCK2MTD DRIVER
  M:     Joern Engel <joern@lazybastard.org>
diff --git a/block/bio-integrity.c b/block/bio-integrity.c

index 5cbd5d9ea61dd52969d9c55313dbf703f3c4d24f..0436c21db7f23b9c5fddf5fce48bc4b21541cd9f 100644 (file)
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  
         /* Restore original bio completion handler */
         bio->bi_end_io = bip->bip_end_io;
-       bio_endio_nodec(bio, error);
+       bio_endio(bio, error);
  }
  
  /**
@@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error)
          */
         if (error) {
                 bio->bi_end_io = bip->bip_end_io;
-               bio_endio_nodec(bio, error);
+               bio_endio(bio, error);
  
                 return;
         }
diff --git a/block/bio.c b/block/bio.c

index f66a4eae16ee4a96c9469c7a9311de3437a923c5..259197d97de1a6e5705457f58dc7727665ab1a93 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -270,8 +270,8 @@ void bio_init(struct bio *bio)
  {
         memset(bio, 0, sizeof(*bio));
         bio->bi_flags = 1 << BIO_UPTODATE;
-       atomic_set(&bio->bi_remaining, 1);
-       atomic_set(&bio->bi_cnt, 1);
+       atomic_set(&bio->__bi_remaining, 1);
+       atomic_set(&bio->__bi_cnt, 1);
  }
  EXPORT_SYMBOL(bio_init);
  
@@ -292,8 +292,8 @@ void bio_reset(struct bio *bio)
         __bio_free(bio);
  
         memset(bio, 0, BIO_RESET_BYTES);
-       bio->bi_flags = flags|(1 << BIO_UPTODATE);
-       atomic_set(&bio->bi_remaining, 1);
+       bio->bi_flags = flags | (1 << BIO_UPTODATE);
+       atomic_set(&bio->__bi_remaining, 1);
  }
  EXPORT_SYMBOL(bio_reset);
  
@@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error)
         bio_put(bio);
  }
  
+/*
+ * Increment chain count for the bio. Make sure the CHAIN flag update
+ * is visible before the raised count.
+ */
+static inline void bio_inc_remaining(struct bio *bio)
+{
+       bio->bi_flags |= (1 << BIO_CHAIN);
+       smp_mb__before_atomic();
+       atomic_inc(&bio->__bi_remaining);
+}
+
  /**
   * bio_chain - chain bio completions
   * @bio: the target bio
@@ -320,7 +331,7 @@ void bio_chain(struct bio *bio, struct bio *parent)
  
         bio->bi_private = parent;
         bio->bi_end_io  = bio_chain_endio;
-       atomic_inc(&parent->bi_remaining);
+       bio_inc_remaining(parent);
  }
  EXPORT_SYMBOL(bio_chain);
  
@@ -524,13 +535,17 @@ EXPORT_SYMBOL(zero_fill_bio);
   **/
  void bio_put(struct bio *bio)
  {
-       BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
-
-       /*
-        * last put frees it
-        */
-       if (atomic_dec_and_test(&bio->bi_cnt))
+       if (!bio_flagged(bio, BIO_REFFED))
                 bio_free(bio);
+       else {
+               BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
+
+               /*
+                * last put frees it
+                */
+               if (atomic_dec_and_test(&bio->__bi_cnt))
+                       bio_free(bio);
+       }
  }
  EXPORT_SYMBOL(bio_put);
  
@@ -1741,6 +1756,25 @@ void bio_flush_dcache_pages(struct bio *bi)
  EXPORT_SYMBOL(bio_flush_dcache_pages);
  #endif
  
+static inline bool bio_remaining_done(struct bio *bio)
+{
+       /*
+        * If we're not chaining, then ->__bi_remaining is always 1 and
+        * we always end io on the first invocation.
+        */
+       if (!bio_flagged(bio, BIO_CHAIN))
+               return true;
+
+       BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
+
+       if (atomic_dec_and_test(&bio->__bi_remaining)) {
+               clear_bit(BIO_CHAIN, &bio->bi_flags);
+               return true;
+       }
+
+       return false;
+}
+
  /**
   * bio_endio - end I/O on a bio
   * @bio:       bio
@@ -1758,15 +1792,13 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
  void bio_endio(struct bio *bio, int error)
  {
         while (bio) {
-               BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
-
                 if (error)
                         clear_bit(BIO_UPTODATE, &bio->bi_flags);
                 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                         error = -EIO;
  
-               if (!atomic_dec_and_test(&bio->bi_remaining))
-                       return;
+               if (unlikely(!bio_remaining_done(bio)))
+                       break;
  
                 /*
                  * Need to have a real endio function for chained bios,
@@ -1789,21 +1821,6 @@ void bio_endio(struct bio *bio, int error)
  }
  EXPORT_SYMBOL(bio_endio);
  
-/**
- * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
- * @bio:       bio
- * @error:     error, if any
- *
- * For code that has saved and restored bi_end_io; thing hard before using this
- * function, probably you should've cloned the entire bio.
- **/
-void bio_endio_nodec(struct bio *bio, int error)
-{
-       atomic_inc(&bio->bi_remaining);
-       bio_endio(bio, error);
-}
-EXPORT_SYMBOL(bio_endio_nodec);
-
  /**
   * bio_split - split a bio
   * @bio:       bio to split
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 0ac817b750dbc7a15f0c69c21ed184d5882e1696..6e43fa355e7127e8e2b10ff33eee5c0ab43ccf90 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -9,6 +9,10 @@
   *
   * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
   *                   Nauman Rafique <nauman@google.com>
+ *
+ * For policy-specific per-blkcg data:
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *                    Arianna Avanzini <avanzini.arianna@gmail.com>
   */
  #include <linux/ioprio.h>
  #include <linux/kdev_t.h>
@@ -26,8 +30,7 @@
  
  static DEFINE_MUTEX(blkcg_pol_mutex);
  
-struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
-                           .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
+struct blkcg blkcg_root;
  EXPORT_SYMBOL_GPL(blkcg_root);
  
  static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
@@ -823,6 +826,8 @@ static struct cgroup_subsys_state *
  blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
  {
         struct blkcg *blkcg;
+       struct cgroup_subsys_state *ret;
+       int i;
  
         if (!parent_css) {
                 blkcg = &blkcg_root;
@@ -830,17 +835,49 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
         }
  
         blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
-       if (!blkcg)
-               return ERR_PTR(-ENOMEM);
+       if (!blkcg) {
+               ret = ERR_PTR(-ENOMEM);
+               goto free_blkcg;
+       }
+
+       for (i = 0; i < BLKCG_MAX_POLS ; i++) {
+               struct blkcg_policy *pol = blkcg_policy[i];
+               struct blkcg_policy_data *cpd;
+
+               /*
+                * If the policy hasn't been attached yet, wait for it
+                * to be attached before doing anything else. Otherwise,
+                * check if the policy requires any specific per-cgroup
+                * data: if it does, allocate and initialize it.
+                */
+               if (!pol || !pol->cpd_size)
+                       continue;
+
+               BUG_ON(blkcg->pd[i]);
+               cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+               if (!cpd) {
+                       ret = ERR_PTR(-ENOMEM);
+                       goto free_pd_blkcg;
+               }
+               blkcg->pd[i] = cpd;
+               cpd->plid = i;
+               pol->cpd_init_fn(blkcg);
+       }
  
-       blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
-       blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
  done:
         spin_lock_init(&blkcg->lock);
         INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
         INIT_HLIST_HEAD(&blkcg->blkg_list);
  
         return &blkcg->css;
+
+free_pd_blkcg:
+       for (i--; i >= 0; i--)
+               kfree(blkcg->pd[i]);
+
+free_blkcg:
+       kfree(blkcg);
+       return ret;
  }
  
  /**
@@ -958,8 +995,10 @@ int blkcg_activate_policy(struct request_queue *q,
                           const struct blkcg_policy *pol)
  {
         LIST_HEAD(pds);
+       LIST_HEAD(cpds);
         struct blkcg_gq *blkg, *new_blkg;
-       struct blkg_policy_data *pd, *n;
+       struct blkg_policy_data *pd, *nd;
+       struct blkcg_policy_data *cpd, *cnd;
         int cnt = 0, ret;
         bool preloaded;
  
@@ -1003,7 +1042,10 @@ int blkcg_activate_policy(struct request_queue *q,
  
         spin_unlock_irq(q->queue_lock);
  
-       /* allocate policy_data for all existing blkgs */
+       /*
+        * Allocate per-blkg and per-blkcg policy data
+        * for all existing blkgs.
+        */
         while (cnt--) {
                 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
                 if (!pd) {
@@ -1011,26 +1053,50 @@ int blkcg_activate_policy(struct request_queue *q,
                         goto out_free;
                 }
                 list_add_tail(&pd->alloc_node, &pds);
+
+               if (!pol->cpd_size)
+                       continue;
+               cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
+               if (!cpd) {
+                       ret = -ENOMEM;
+                       goto out_free;
+               }
+               list_add_tail(&cpd->alloc_node, &cpds);
         }
  
         /*
-        * Install the allocated pds.  With @q bypassing, no new blkg
+        * Install the allocated pds and cpds. With @q bypassing, no new blkg
          * should have been created while the queue lock was dropped.
          */
         spin_lock_irq(q->queue_lock);
  
         list_for_each_entry(blkg, &q->blkg_list, q_node) {
-               if (WARN_ON(list_empty(&pds))) {
+               if (WARN_ON(list_empty(&pds)) ||
+                   WARN_ON(pol->cpd_size && list_empty(&cpds))) {
                         /* umm... this shouldn't happen, just abort */
                         ret = -ENOMEM;
                         goto out_unlock;
                 }
+               cpd = list_first_entry(&cpds, struct blkcg_policy_data,
+                                      alloc_node);
+               list_del_init(&cpd->alloc_node);
                 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
                 list_del_init(&pd->alloc_node);
  
                 /* grab blkcg lock too while installing @pd on @blkg */
                 spin_lock(&blkg->blkcg->lock);
  
+               if (!pol->cpd_size)
+                       goto no_cpd;
+               if (!blkg->blkcg->pd[pol->plid]) {
+                       /* Per-policy per-blkcg data */
+                       blkg->blkcg->pd[pol->plid] = cpd;
+                       cpd->plid = pol->plid;
+                       pol->cpd_init_fn(blkg->blkcg);
+               } else { /* must free it as it has already been extracted */
+                       kfree(cpd);
+               }
+no_cpd:
                 blkg->pd[pol->plid] = pd;
                 pd->blkg = blkg;
                 pd->plid = pol->plid;
@@ -1045,8 +1111,10 @@ out_unlock:
         spin_unlock_irq(q->queue_lock);
  out_free:
         blk_queue_bypass_end(q);
-       list_for_each_entry_safe(pd, n, &pds, alloc_node)
+       list_for_each_entry_safe(pd, nd, &pds, alloc_node)
                 kfree(pd);
+       list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
+               kfree(cpd);
         return ret;
  }
  EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1087,6 +1155,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
  
                 kfree(blkg->pd[pol->plid]);
                 blkg->pd[pol->plid] = NULL;
+               kfree(blkg->blkcg->pd[pol->plid]);
+               blkg->blkcg->pd[pol->plid] = NULL;
  
                 spin_unlock(&blkg->blkcg->lock);
         }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h

index c567865b5f1df6baea2cdabf89663994a67e8ade..74296a78bba16896b5eb8c65a7e618163b766f9f 100644 (file)
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -23,11 +23,6 @@
  /* Max limits for throttle policy */
  #define THROTL_IOPS_MAX                UINT_MAX
  
-/* CFQ specific, out here for blkcg->cfq_weight */
-#define CFQ_WEIGHT_MIN         10
-#define CFQ_WEIGHT_MAX         1000
-#define CFQ_WEIGHT_DEFAULT     500
-
  #ifdef CONFIG_BLK_CGROUP
  
  enum blkg_rwstat_type {
@@ -50,9 +45,7 @@ struct blkcg {
         struct blkcg_gq                 *blkg_hint;
         struct hlist_head               blkg_list;
  
-       /* TODO: per-policy storage in blkcg */
-       unsigned int                    cfq_weight;     /* belongs to cfq */
-       unsigned int                    cfq_leaf_weight;
+       struct blkcg_policy_data        *pd[BLKCG_MAX_POLS];
  };
  
  struct blkg_stat {
@@ -87,6 +80,24 @@ struct blkg_policy_data {
         struct list_head                alloc_node;
  };
  
+/*
+ * Policies that need to keep per-blkcg data which is independent
+ * from any request_queue associated to it must specify its size
+ * with the cpd_size field of the blkcg_policy structure and
+ * embed a blkcg_policy_data in it. blkcg core allocates
+ * policy-specific per-blkcg structures lazily the first time
+ * they are actually needed, so it handles them together with
+ * blkgs. cpd_init() is invoked to let each policy handle
+ * per-blkcg data.
+ */
+struct blkcg_policy_data {
+       /* the policy id this per-policy data belongs to */
+       int                             plid;
+
+       /* used during policy activation */
+       struct list_head                alloc_node;
+};
+
  /* association between a blk cgroup and a request queue */
  struct blkcg_gq {
         /* Pointer to the associated request_queue */
@@ -112,6 +123,7 @@ struct blkcg_gq {
         struct rcu_head                 rcu_head;
  };
  
+typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg);
  typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
  typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
  typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
@@ -122,10 +134,13 @@ struct blkcg_policy {
         int                             plid;
         /* policy specific private data size */
         size_t                          pd_size;
+       /* policy specific per-blkcg data size */
+       size_t                          cpd_size;
         /* cgroup files for the policy */
         struct cftype                   *cftypes;
  
         /* operations */
+       blkcg_pol_init_cpd_fn           *cpd_init_fn;
         blkcg_pol_init_pd_fn            *pd_init_fn;
         blkcg_pol_online_pd_fn          *pd_online_fn;
         blkcg_pol_offline_pd_fn         *pd_offline_fn;
@@ -218,6 +233,12 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
         return blkg ? blkg->pd[pol->plid] : NULL;
  }
  
+static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
+                                                    struct blkcg_policy *pol)
+{
+       return blkcg ? blkcg->pd[pol->plid] : NULL;
+}
+
  /**
   * pdata_to_blkg - get blkg associated with policy private data
   * @pd: policy private data of interest
@@ -564,6 +585,9 @@ struct blkcg;
  struct blkg_policy_data {
  };
  
+struct blkcg_policy_data {
+};
+
  struct blkcg_gq {
  };
  
diff --git a/block/blk-core.c b/block/blk-core.c

index 03b5f8d77f37b4cbad3a12f3a98f9c3ea63a50e7..f6ab750060fe019f97d0ccfbca367b9e6cd3b426 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(blk_rq_init);
  static void req_bio_endio(struct request *rq, struct bio *bio,
                           unsigned int nbytes, int error)
  {
-       if (error)
+       if (error && !(rq->cmd_flags & REQ_CLONE))
                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
         else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                 error = -EIO;
@@ -128,7 +128,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
         bio_advance(bio, nbytes);
  
         /* don't actually finish bio if it's part of flush sequence */
-       if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+       if (bio->bi_iter.bi_size == 0 &&
+           !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE)))
                 bio_endio(bio, error);
  }
  
@@ -285,6 +286,7 @@ inline void __blk_run_queue_uncond(struct request_queue *q)
         q->request_fn(q);
         q->request_fn_active--;
  }
+EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  
  /**
   * __blk_run_queue - run a single device queue
@@ -1525,7 +1527,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
   * Caller must ensure !blk_queue_nomerges(q) beforehand.
   */
  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                           unsigned int *request_count)
+                           unsigned int *request_count,
+                           struct request **same_queue_rq)
  {
         struct blk_plug *plug;
         struct request *rq;
@@ -1545,8 +1548,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
         list_for_each_entry_reverse(rq, plug_list, queuelist) {
                 int el_ret;
  
-               if (rq->q == q)
+               if (rq->q == q) {
                         (*request_count)++;
+                       /*
+                        * Only blk-mq multiple hardware queues case checks the
+                        * rq in the same queue, there should be only one such
+                        * rq in a queue
+                        **/
+                       if (same_queue_rq)
+                               *same_queue_rq = rq;
+               }
  
                 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
                         continue;
@@ -1611,7 +1622,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
          * any locks.
          */
         if (!blk_queue_nomerges(q) &&
-           blk_attempt_plug_merge(q, bio, &request_count))
+           blk_attempt_plug_merge(q, bio, &request_count, NULL))
                 return;
  
         spin_lock_irq(q->queue_lock);
@@ -1718,8 +1729,6 @@ static void handle_bad_sector(struct bio *bio)
                         bio->bi_rw,
                         (unsigned long long)bio_end_sector(bio),
                         (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
-
-       set_bit(BIO_EOF, &bio->bi_flags);
  }
  
  #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2904,95 +2913,22 @@ int blk_lld_busy(struct request_queue *q)
  }
  EXPORT_SYMBOL_GPL(blk_lld_busy);
  
-/**
- * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
- * @rq: the clone request to be cleaned up
- *
- * Description:
- *     Free all bios in @rq for a cloned request.
- */
-void blk_rq_unprep_clone(struct request *rq)
-{
-       struct bio *bio;
-
-       while ((bio = rq->bio) != NULL) {
-               rq->bio = bio->bi_next;
-
-               bio_put(bio);
-       }
-}
-EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
-
-/*
- * Copy attributes of the original request to the clone request.
- * The actual data parts (e.g. ->cmd, ->sense) are not copied.
- */
-static void __blk_rq_prep_clone(struct request *dst, struct request *src)
+void blk_rq_prep_clone(struct request *dst, struct request *src)
  {
         dst->cpu = src->cpu;
-       dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+       dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK);
+       dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE;
         dst->cmd_type = src->cmd_type;
         dst->__sector = blk_rq_pos(src);
         dst->__data_len = blk_rq_bytes(src);
         dst->nr_phys_segments = src->nr_phys_segments;
         dst->ioprio = src->ioprio;
         dst->extra_len = src->extra_len;
-}
-
-/**
- * blk_rq_prep_clone - Helper function to setup clone request
- * @rq: the request to be setup
- * @rq_src: original request to be cloned
- * @bs: bio_set that bios for clone are allocated from
- * @gfp_mask: memory allocation mask for bio
- * @bio_ctr: setup function to be called for each clone bio.
- *           Returns %0 for success, non %0 for failure.
- * @data: private data to be passed to @bio_ctr
- *
- * Description:
- *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
- *     The actual data parts of @rq_src (e.g. ->cmd, ->sense)
- *     are not copied, and copying such parts is the caller's responsibility.
- *     Also, pages which the original bios are pointing to are not copied
- *     and the cloned bios just point same pages.
- *     So cloned bios must be completed before original bios, which means
- *     the caller must complete @rq before @rq_src.
- */
-int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-                     struct bio_set *bs, gfp_t gfp_mask,
-                     int (*bio_ctr)(struct bio *, struct bio *, void *),
-                     void *data)
-{
-       struct bio *bio, *bio_src;
-
-       if (!bs)
-               bs = fs_bio_set;
-
-       __rq_for_each_bio(bio_src, rq_src) {
-               bio = bio_clone_fast(bio_src, gfp_mask, bs);
-               if (!bio)
-                       goto free_and_out;
-
-               if (bio_ctr && bio_ctr(bio, bio_src, data))
-                       goto free_and_out;
-
-               if (rq->bio) {
-                       rq->biotail->bi_next = bio;
-                       rq->biotail = bio;
-               } else
-                       rq->bio = rq->biotail = bio;
-       }
-
-       __blk_rq_prep_clone(rq, rq_src);
-
-       return 0;
-
-free_and_out:
-       if (bio)
-               bio_put(bio);
-       blk_rq_unprep_clone(rq);
-
-       return -ENOMEM;
+       dst->bio = src->bio;
+       dst->biotail = src->biotail;
+       dst->cmd = src->cmd;
+       dst->cmd_len = src->cmd_len;
+       dst->sense = src->sense;
  }
  EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
  
@@ -3034,21 +2970,20 @@ void blk_start_plug(struct blk_plug *plug)
  {
         struct task_struct *tsk = current;
  
+       /*
+        * If this is a nested plug, don't actually assign it.
+        */
+       if (tsk->plug)
+               return;
+
         INIT_LIST_HEAD(&plug->list);
         INIT_LIST_HEAD(&plug->mq_list);
         INIT_LIST_HEAD(&plug->cb_list);
-
         /*
-        * If this is a nested plug, don't actually assign it. It will be
-        * flushed on its own.
+        * Store ordering should not be needed here, since a potential
+        * preempt will imply a full memory barrier
          */
-       if (!tsk->plug) {
-               /*
-                * Store ordering should not be needed here, since a potential
-                * preempt will imply a full memory barrier
-                */
-               tsk->plug = plug;
-       }
+       tsk->plug = plug;
  }
  EXPORT_SYMBOL(blk_start_plug);
  
@@ -3195,10 +3130,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  
  void blk_finish_plug(struct blk_plug *plug)
  {
+       if (plug != current->plug)
+               return;
         blk_flush_plug_list(plug, false);
  
-       if (plug == current->plug)
-               current->plug = NULL;
+       current->plug = NULL;
  }
  EXPORT_SYMBOL(blk_finish_plug);
  
diff --git a/block/blk-exec.c b/block/blk-exec.c

index 9924725fa50dcac5563d2654a24df0d85cfada7a..3fec8a29d0fae16f3c0398ff292a75dbce38cf77 100644 (file)
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -53,7 +53,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
                            rq_end_io_fn *done)
  {
         int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
-       bool is_pm_resume;
  
         WARN_ON(irqs_disabled());
         WARN_ON(rq->cmd_type == REQ_TYPE_FS);
@@ -70,12 +69,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
                 return;
         }
  
-       /*
-        * need to check this before __blk_run_queue(), because rq can
-        * be freed before that returns.
-        */
-       is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME;
-
         spin_lock_irq(q->queue_lock);
  
         if (unlikely(blk_queue_dying(q))) {
@@ -88,9 +81,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
  
         __elv_add_request(q, rq, where);
         __blk_run_queue(q);
-       /* the queue is stopped so it won't be run */
-       if (is_pm_resume)
-               __blk_run_queue_uncond(q);
         spin_unlock_irq(q->queue_lock);
  }
  EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk-merge.c b/block/blk-merge.c

index fd3fee81c23ce2f1cdc73d2bf4e76188c90358cb..30a0d9f890170b7540546557b950713b8e372780 100644 (file)
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -589,7 +589,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
             !blk_write_same_mergeable(rq->bio, bio))
                 return false;
  
-       if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) {
+       /* Only check gaps if the bio carries data */
+       if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && bio_has_data(bio)) {
                 struct bio_vec *bprev;
  
                 bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index be3290cc0644efc9e3feec6fd891c7afe1a15775..9b6e28830b823866ec63ccd2f6b138c88be562b5 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -438,6 +438,39 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx,
         }
  }
  
+static void bt_tags_for_each(struct blk_mq_tags *tags,
+               struct blk_mq_bitmap_tags *bt, unsigned int off,
+               busy_tag_iter_fn *fn, void *data, bool reserved)
+{
+       struct request *rq;
+       int bit, i;
+
+       if (!tags->rqs)
+               return;
+       for (i = 0; i < bt->map_nr; i++) {
+               struct blk_align_bitmap *bm = &bt->map[i];
+
+               for (bit = find_first_bit(&bm->word, bm->depth);
+                    bit < bm->depth;
+                    bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
+                       rq = blk_mq_tag_to_rq(tags, off + bit);
+                       fn(rq, data, reserved);
+               }
+
+               off += (1 << bt->bits_per_word);
+       }
+}
+
+void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
+               void *priv)
+{
+       if (tags->nr_reserved_tags)
+               bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true);
+       bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
+                       false);
+}
+EXPORT_SYMBOL(blk_mq_all_tag_busy_iter);
+
  void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
                 void *priv)
  {
@@ -580,6 +613,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
         if (!tags)
                 return NULL;
  
+       if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) {
+               kfree(tags);
+               return NULL;
+       }
+
         tags->nr_tags = total_tags;
         tags->nr_reserved_tags = reserved_tags;
  
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h

index 90767b370308daab9bdd7efe3903dd72af2a6dad..75893a34237d2eb2b4d07bd21a70cc35948c172a 100644 (file)
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -44,6 +44,7 @@ struct blk_mq_tags {
         struct list_head page_list;
  
         int alloc_policy;
+       cpumask_var_t cpumask;
  };
  
  
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 594eea04266e6d05f7256255552a1c4c72c664f3..f53779692c772a1cc06ec341f9fab307b2ceef91 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
                         return -EBUSY;
  
                 ret = wait_event_interruptible(q->mq_freeze_wq,
-                               !q->mq_freeze_depth || blk_queue_dying(q));
+                               !atomic_read(&q->mq_freeze_depth) ||
+                               blk_queue_dying(q));
                 if (blk_queue_dying(q))
                         return -ENODEV;
                 if (ret)
@@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
  
  void blk_mq_freeze_queue_start(struct request_queue *q)
  {
-       bool freeze;
+       int freeze_depth;
  
-       spin_lock_irq(q->queue_lock);
-       freeze = !q->mq_freeze_depth++;
-       spin_unlock_irq(q->queue_lock);
-
-       if (freeze) {
+       freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
+       if (freeze_depth == 1) {
                 percpu_ref_kill(&q->mq_usage_counter);
                 blk_mq_run_hw_queues(q, false);
         }
@@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
  
  void blk_mq_unfreeze_queue(struct request_queue *q)
  {
-       bool wake;
+       int freeze_depth;
  
-       spin_lock_irq(q->queue_lock);
-       wake = !--q->mq_freeze_depth;
-       WARN_ON_ONCE(q->mq_freeze_depth < 0);
-       spin_unlock_irq(q->queue_lock);
-       if (wake) {
+       freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
+       WARN_ON_ONCE(freeze_depth < 0);
+       if (!freeze_depth) {
                 percpu_ref_reinit(&q->mq_usage_counter);
                 wake_up_all(&q->mq_freeze_wq);
         }
@@ -1237,6 +1233,38 @@ static struct request *blk_mq_map_request(struct request_queue *q,
         return rq;
  }
  
+static int blk_mq_direct_issue_request(struct request *rq)
+{
+       int ret;
+       struct request_queue *q = rq->q;
+       struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
+                       rq->mq_ctx->cpu);
+       struct blk_mq_queue_data bd = {
+               .rq = rq,
+               .list = NULL,
+               .last = 1
+       };
+
+       /*
+        * For OK queue, we are done. For error, kill it. Any other
+        * error (busy), just add it to our list as we previously
+        * would have done
+        */
+       ret = q->mq_ops->queue_rq(hctx, &bd);
+       if (ret == BLK_MQ_RQ_QUEUE_OK)
+               return 0;
+       else {
+               __blk_mq_requeue_request(rq);
+
+               if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+                       rq->errors = -EIO;
+                       blk_mq_end_request(rq, rq->errors);
+                       return 0;
+               }
+               return -1;
+       }
+}
+
  /*
   * Multiple hardware queue variant. This will not use per-process plugs,
   * but will attempt to bypass the hctx queueing if we can go straight to
@@ -1248,6 +1276,9 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
         const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
         struct blk_map_ctx data;
         struct request *rq;
+       unsigned int request_count = 0;
+       struct blk_plug *plug;
+       struct request *same_queue_rq = NULL;
  
         blk_queue_bounce(q, &bio);
  
@@ -1256,6 +1287,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
                 return;
         }
  
+       if (!is_flush_fua && !blk_queue_nomerges(q) &&
+           blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
+               return;
+
         rq = blk_mq_map_request(q, bio, &data);
         if (unlikely(!rq))
                 return;
@@ -1266,38 +1301,42 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
                 goto run_queue;
         }
  
+       plug = current->plug;
         /*
          * If the driver supports defer issued based on 'last', then
          * queue it up like normal since we can potentially save some
          * CPU this way.
          */
-       if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
-               struct blk_mq_queue_data bd = {
-                       .rq = rq,
-                       .list = NULL,
-                       .last = 1
-               };
-               int ret;
+       if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
+           !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
+               struct request *old_rq = NULL;
  
                 blk_mq_bio_to_request(rq, bio);
  
                 /*
-                * For OK queue, we are done. For error, kill it. Any other
-                * error (busy), just add it to our list as we previously
-                * would have done
+                * we do limited pluging. If bio can be merged, do merge.
+                * Otherwise the existing request in the plug list will be
+                * issued. So the plug list will have one request at most
                  */
-               ret = q->mq_ops->queue_rq(data.hctx, &bd);
-               if (ret == BLK_MQ_RQ_QUEUE_OK)
-                       goto done;
-               else {
-                       __blk_mq_requeue_request(rq);
-
-                       if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
-                               rq->errors = -EIO;
-                               blk_mq_end_request(rq, rq->errors);
-                               goto done;
+               if (plug) {
+                       /*
+                        * The plug list might get flushed before this. If that
+                        * happens, same_queue_rq is invalid and plug list is empty
+                        **/
+                       if (same_queue_rq && !list_empty(&plug->mq_list)) {
+                               old_rq = same_queue_rq;
+                               list_del_init(&old_rq->queuelist);
                         }
-               }
+                       list_add_tail(&rq->queuelist, &plug->mq_list);
+               } else /* is_sync */
+                       old_rq = rq;
+               blk_mq_put_ctx(data.ctx);
+               if (!old_rq)
+                       return;
+               if (!blk_mq_direct_issue_request(old_rq))
+                       return;
+               blk_mq_insert_request(old_rq, false, true, true);
+               return;
         }
  
         if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1310,7 +1349,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
  run_queue:
                 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
         }
-done:
         blk_mq_put_ctx(data.ctx);
  }
  
@@ -1322,16 +1360,11 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
  {
         const int is_sync = rw_is_sync(bio->bi_rw);
         const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
-       unsigned int use_plug, request_count = 0;
+       struct blk_plug *plug;
+       unsigned int request_count = 0;
         struct blk_map_ctx data;
         struct request *rq;
  
-       /*
-        * If we have multiple hardware queues, just go directly to
-        * one of those for sync IO.
-        */
-       use_plug = !is_flush_fua && !is_sync;
-
         blk_queue_bounce(q, &bio);
  
         if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
@@ -1339,8 +1372,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
                 return;
         }
  
-       if (use_plug && !blk_queue_nomerges(q) &&
-           blk_attempt_plug_merge(q, bio, &request_count))
+       if (!is_flush_fua && !blk_queue_nomerges(q) &&
+           blk_attempt_plug_merge(q, bio, &request_count, NULL))
                 return;
  
         rq = blk_mq_map_request(q, bio, &data);
@@ -1358,21 +1391,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
          * utilize that to temporarily store requests until the task is
          * either done or scheduled away.
          */
-       if (use_plug) {
-               struct blk_plug *plug = current->plug;
-
-               if (plug) {
-                       blk_mq_bio_to_request(rq, bio);
-                       if (list_empty(&plug->mq_list))
-                               trace_block_plug(q);
-                       else if (request_count >= BLK_MAX_REQUEST_COUNT) {
-                               blk_flush_plug_list(plug, false);
-                               trace_block_plug(q);
-                       }
-                       list_add_tail(&rq->queuelist, &plug->mq_list);
-                       blk_mq_put_ctx(data.ctx);
-                       return;
+       plug = current->plug;
+       if (plug) {
+               blk_mq_bio_to_request(rq, bio);
+               if (list_empty(&plug->mq_list))
+                       trace_block_plug(q);
+               else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+                       blk_flush_plug_list(plug, false);
+                       trace_block_plug(q);
                 }
+               list_add_tail(&rq->queuelist, &plug->mq_list);
+               blk_mq_put_ctx(data.ctx);
+               return;
         }
  
         if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1508,7 +1538,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
                         i++;
                 }
         }
-
         return tags;
  
  fail:
@@ -1792,6 +1821,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
  
                 hctx = q->mq_ops->map_queue(q, i);
                 cpumask_set_cpu(i, hctx->cpumask);
+               cpumask_set_cpu(i, hctx->tags->cpumask);
                 ctx->index_hw = hctx->nr_ctx;
                 hctx->ctxs[hctx->nr_ctx++] = ctx;
         }
@@ -2056,7 +2086,7 @@ void blk_mq_free_queue(struct request_queue *q)
  /* Basically redo blk_mq_init_queue with queue frozen */
  static void blk_mq_queue_reinit(struct request_queue *q)
  {
-       WARN_ON_ONCE(!q->mq_freeze_depth);
+       WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
  
         blk_mq_sysfs_unregister(q);
  
@@ -2173,6 +2203,12 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
         return 0;
  }
  
+struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags)
+{
+       return tags->cpumask;
+}
+EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask);
+
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
@@ -2234,8 +2270,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
         int i;
  
         for (i = 0; i < set->nr_hw_queues; i++) {
-               if (set->tags[i])
+               if (set->tags[i]) {
                         blk_mq_free_rq_map(set, set->tags[i], i);
+                       free_cpumask_var(set->tags[i]->cpumask);
+               }
         }
  
         kfree(set->tags);
diff --git a/block/blk.h b/block/blk.h

index 43b036185712c25f66d04a3c9e49565297c94ccd..026d9594142bdedf1d5a0b919390bb54b3f857c1 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -78,7 +78,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
  bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
                             struct bio *bio);
  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                           unsigned int *request_count);
+                           unsigned int *request_count,
+                           struct request **same_queue_rq);
  
  void blk_account_io_start(struct request *req, bool new_io);
  void blk_account_io_completion(struct request *req, unsigned int bytes);
@@ -193,8 +194,6 @@ int blk_try_merge(struct request *rq, struct bio *bio);
  
  void blk_queue_congestion_threshold(struct request_queue *q);
  
-void __blk_run_queue_uncond(struct request_queue *q);
-
  int blk_dev_init(void);
  
  
diff --git a/block/bounce.c b/block/bounce.c

index ed9dd80671204bdebc4005544097fb05b6c90c62..3ab0bce1c947ef9be81f09139aa73d9bd4b76ff5 100644 (file)
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
         struct bio_vec *bvec, *org_vec;
         int i;
  
-       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
-               set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
-
         /*
          * free up bounce indirect pages used
          */
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 5da8e6e9ab4bfd72c76d5afa94a823ee0032f45b..d8ad45ccd8fa784a60dac66d91158eb0c9065b27 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -67,6 +67,11 @@ static struct kmem_cache *cfq_pool;
  #define sample_valid(samples)  ((samples) > 80)
  #define rb_entry_cfqg(node)    rb_entry((node), struct cfq_group, rb_node)
  
+/* blkio-related constants */
+#define CFQ_WEIGHT_MIN          10
+#define CFQ_WEIGHT_MAX          1000
+#define CFQ_WEIGHT_DEFAULT      500
+
  struct cfq_ttime {
         unsigned long last_end_request;
  
@@ -212,6 +217,15 @@ struct cfqg_stats {
  #endif /* CONFIG_CFQ_GROUP_IOSCHED */
  };
  
+/* Per-cgroup data */
+struct cfq_group_data {
+       /* must be the first member */
+       struct blkcg_policy_data pd;
+
+       unsigned int weight;
+       unsigned int leaf_weight;
+};
+
  /* This is per cgroup per device grouping structure */
  struct cfq_group {
         /* must be the first member */
@@ -446,16 +460,6 @@ CFQ_CFQQ_FNS(deep);
  CFQ_CFQQ_FNS(wait_busy);
  #undef CFQ_CFQQ_FNS
  
-static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
-{
-       return pd ? container_of(pd, struct cfq_group, pd) : NULL;
-}
-
-static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
-{
-       return pd_to_blkg(&cfqg->pd);
-}
-
  #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
  
  /* cfqg stats flags */
@@ -600,6 +604,22 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
  
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
  
+static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
+{
+       return pd ? container_of(pd, struct cfq_group, pd) : NULL;
+}
+
+static struct cfq_group_data
+*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
+{
+       return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL;
+}
+
+static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
+{
+       return pd_to_blkg(&cfqg->pd);
+}
+
  static struct blkcg_policy blkcg_policy_cfq;
  
  static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
@@ -607,6 +627,11 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
         return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
  }
  
+static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg)
+{
+       return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq));
+}
+
  static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
  {
         struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
@@ -1544,13 +1569,28 @@ static void cfqg_stats_init(struct cfqg_stats *stats)
  #endif
  }
  
+static void cfq_cpd_init(const struct blkcg *blkcg)
+{
+       struct cfq_group_data *cgd =
+               cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]);
+
+       if (blkcg == &blkcg_root) {
+               cgd->weight = 2 * CFQ_WEIGHT_DEFAULT;
+               cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
+       } else {
+               cgd->weight = CFQ_WEIGHT_DEFAULT;
+               cgd->leaf_weight = CFQ_WEIGHT_DEFAULT;
+       }
+}
+
  static void cfq_pd_init(struct blkcg_gq *blkg)
  {
         struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+       struct cfq_group_data *cgd = blkcg_to_cfqgd(blkg->blkcg);
  
         cfq_init_cfqg_base(cfqg);
-       cfqg->weight = blkg->blkcg->cfq_weight;
-       cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight;
+       cfqg->weight = cgd->weight;
+       cfqg->leaf_weight = cgd->leaf_weight;
         cfqg_stats_init(&cfqg->stats);
         cfqg_stats_init(&cfqg->dead_stats);
  }
@@ -1673,13 +1713,27 @@ static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
  
  static int cfq_print_weight(struct seq_file *sf, void *v)
  {
-       seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight);
+       struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+       struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
+       unsigned int val = 0;
+
+       if (cgd)
+               val = cgd->weight;
+
+       seq_printf(sf, "%u\n", val);
         return 0;
  }
  
  static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
  {
-       seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight);
+       struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+       struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
+       unsigned int val = 0;
+
+       if (cgd)
+               val = cgd->leaf_weight;
+
+       seq_printf(sf, "%u\n", val);
         return 0;
  }
  
@@ -1690,6 +1744,7 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
         struct blkcg *blkcg = css_to_blkcg(of_css(of));
         struct blkg_conf_ctx ctx;
         struct cfq_group *cfqg;
+       struct cfq_group_data *cfqgd;
         int ret;
  
         ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
@@ -1698,17 +1753,22 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
  
         ret = -EINVAL;
         cfqg = blkg_to_cfqg(ctx.blkg);
+       cfqgd = blkcg_to_cfqgd(blkcg);
+       if (!cfqg || !cfqgd)
+               goto err;
+
         if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
                 if (!is_leaf_weight) {
                         cfqg->dev_weight = ctx.v;
-                       cfqg->new_weight = ctx.v ?: blkcg->cfq_weight;
+                       cfqg->new_weight = ctx.v ?: cfqgd->weight;
                 } else {
                         cfqg->dev_leaf_weight = ctx.v;
-                       cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight;
+                       cfqg->new_leaf_weight = ctx.v ?: cfqgd->leaf_weight;
                 }
                 ret = 0;
         }
  
+err:
         blkg_conf_finish(&ctx);
         return ret ?: nbytes;
  }
@@ -1730,16 +1790,23 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
  {
         struct blkcg *blkcg = css_to_blkcg(css);
         struct blkcg_gq *blkg;
+       struct cfq_group_data *cfqgd;
+       int ret = 0;
  
         if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
                 return -EINVAL;
  
         spin_lock_irq(&blkcg->lock);
+       cfqgd = blkcg_to_cfqgd(blkcg);
+       if (!cfqgd) {
+               ret = -EINVAL;
+               goto out;
+       }
  
         if (!is_leaf_weight)
-               blkcg->cfq_weight = val;
+               cfqgd->weight = val;
         else
-               blkcg->cfq_leaf_weight = val;
+               cfqgd->leaf_weight = val;
  
         hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
                 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1749,15 +1816,16 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
  
                 if (!is_leaf_weight) {
                         if (!cfqg->dev_weight)
-                               cfqg->new_weight = blkcg->cfq_weight;
+                               cfqg->new_weight = cfqgd->weight;
                 } else {
                         if (!cfqg->dev_leaf_weight)
-                               cfqg->new_leaf_weight = blkcg->cfq_leaf_weight;
+                               cfqg->new_leaf_weight = cfqgd->leaf_weight;
                 }
         }
  
+out:
         spin_unlock_irq(&blkcg->lock);
-       return 0;
+       return ret;
  }
  
  static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -4477,6 +4545,18 @@ out_free:
         return ret;
  }
  
+static void cfq_registered_queue(struct request_queue *q)
+{
+       struct elevator_queue *e = q->elevator;
+       struct cfq_data *cfqd = e->elevator_data;
+
+       /*
+        * Default to IOPS mode with no idling for SSDs
+        */
+       if (blk_queue_nonrot(q))
+               cfqd->cfq_slice_idle = 0;
+}
+
  /*
   * sysfs parts below -->
   */
@@ -4592,6 +4672,7 @@ static struct elevator_type iosched_cfq = {
                 .elevator_may_queue_fn =        cfq_may_queue,
                 .elevator_init_fn =             cfq_init_queue,
                 .elevator_exit_fn =             cfq_exit_queue,
+               .elevator_registered_fn =       cfq_registered_queue,
         },
         .icq_size       =       sizeof(struct cfq_io_cq),
         .icq_align      =       __alignof__(struct cfq_io_cq),
@@ -4603,8 +4684,10 @@ static struct elevator_type iosched_cfq = {
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
  static struct blkcg_policy blkcg_policy_cfq = {
         .pd_size                = sizeof(struct cfq_group),
+       .cpd_size               = sizeof(struct cfq_group_data),
         .cftypes                = cfq_blkcg_files,
  
+       .cpd_init_fn            = cfq_cpd_init,
         .pd_init_fn             = cfq_pd_init,
         .pd_offline_fn          = cfq_pd_offline,
         .pd_reset_stats_fn      = cfq_pd_reset_stats,
diff --git a/block/elevator.c b/block/elevator.c

index 8985038f398ce503261dc4a29390a63c9f7b5b44..942579d04128b5484f2d3e53bf38b4994ef852ee 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -806,6 +806,8 @@ int elv_register_queue(struct request_queue *q)
                 }
                 kobject_uevent(&e->kobj, KOBJ_ADD);
                 e->registered = 1;
+               if (e->type->ops.elevator_registered_fn)
+                       e->type->ops.elevator_registered_fn(q);
         }
         return error;
  }
diff --git a/block/ioctl.c b/block/ioctl.c

index 7d8befde2aca7a3c007310ea59776a55f3e1bbbe..8061eba42887a9c1163a8d2271517b8b061ce554 100644 (file)
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -150,21 +150,48 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
         }
  }
  
-static int blkdev_reread_part(struct block_device *bdev)
+/*
+ * This is an exported API for the block driver, and will not
+ * acquire bd_mutex. This API should be used in case that
+ * caller has held bd_mutex already.
+ */
+int __blkdev_reread_part(struct block_device *bdev)
  {
         struct gendisk *disk = bdev->bd_disk;
-       int res;
  
         if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
                 return -EINVAL;
         if (!capable(CAP_SYS_ADMIN))
                 return -EACCES;
-       if (!mutex_trylock(&bdev->bd_mutex))
-               return -EBUSY;
-       res = rescan_partitions(disk, bdev);
+
+       lockdep_assert_held(&bdev->bd_mutex);
+
+       return rescan_partitions(disk, bdev);
+}
+EXPORT_SYMBOL(__blkdev_reread_part);
+
+/*
+ * This is an exported API for the block driver, and will
+ * try to acquire bd_mutex. If bd_mutex has been held already
+ * in current context, please call __blkdev_reread_part().
+ *
+ * Make sure the held locks in current context aren't required
+ * in open()/close() handler and I/O path for avoiding ABBA deadlock:
+ * - bd_mutex is held before calling block driver's open/close
+ *   handler
+ * - reading partition table may submit I/O to the block device
+ */
+int blkdev_reread_part(struct block_device *bdev)
+{
+       int res;
+
+       mutex_lock(&bdev->bd_mutex);
+       res = __blkdev_reread_part(bdev);
         mutex_unlock(&bdev->bd_mutex);
+
         return res;
  }
+EXPORT_SYMBOL(blkdev_reread_part);
  
  static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
                              uint64_t len, int secure)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index 39e5f7fae3efb76ab888ec19b2b8a8fdb4b258b4..83a7ba4a3eeca86d297d5c694e249005e9bd1199 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
         int result, flags;
         struct nbd_request request;
         unsigned long size = blk_rq_bytes(req);
+       u32 type;
+
+       if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+               type = NBD_CMD_DISC;
+       else if (req->cmd_flags & REQ_DISCARD)
+               type = NBD_CMD_TRIM;
+       else if (req->cmd_flags & REQ_FLUSH)
+               type = NBD_CMD_FLUSH;
+       else if (rq_data_dir(req) == WRITE)
+               type = NBD_CMD_WRITE;
+       else
+               type = NBD_CMD_READ;
  
         memset(&request, 0, sizeof(request));
         request.magic = htonl(NBD_REQUEST_MAGIC);
-       request.type = htonl(nbd_cmd(req));
-
-       if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+       request.type = htonl(type);
+       if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
                 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
                 request.len = htonl(size);
         }
         memcpy(request.handle, &req, sizeof(req));
  
         dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
-               req, nbdcmd_to_ascii(nbd_cmd(req)),
+               req, nbdcmd_to_ascii(type),
                 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
         result = sock_xmit(nbd, 1, &request, sizeof(request),
-                       (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+                       (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
         if (result <= 0) {
                 dev_err(disk_to_dev(nbd->disk),
                         "Send control failed (result %d)\n", result);
                 return -EIO;
         }
  
-       if (nbd_cmd(req) == NBD_CMD_WRITE) {
+       if (type == NBD_CMD_WRITE) {
                 struct req_iterator iter;
                 struct bio_vec bvec;
                 /*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
         }
  
         dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
-       if (nbd_cmd(req) == NBD_CMD_READ) {
+       if (rq_data_dir(req) != WRITE) {
                 struct req_iterator iter;
                 struct bio_vec bvec;
  
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
         if (req->cmd_type != REQ_TYPE_FS)
                 goto error_out;
  
-       nbd_cmd(req) = NBD_CMD_READ;
-       if (rq_data_dir(req) == WRITE) {
-               if ((req->cmd_flags & REQ_DISCARD)) {
-                       WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
-                       nbd_cmd(req) = NBD_CMD_TRIM;
-               } else
-                       nbd_cmd(req) = NBD_CMD_WRITE;
-               if (nbd->flags & NBD_FLAG_READ_ONLY) {
-                       dev_err(disk_to_dev(nbd->disk),
-                               "Write on read-only\n");
-                       goto error_out;
-               }
-       }
-
-       if (req->cmd_flags & REQ_FLUSH) {
-               BUG_ON(unlikely(blk_rq_sectors(req)));
-               nbd_cmd(req) = NBD_CMD_FLUSH;
+       if (rq_data_dir(req) == WRITE &&
+           (nbd->flags & NBD_FLAG_READ_ONLY)) {
+               dev_err(disk_to_dev(nbd->disk),
+                       "Write on read-only\n");
+               goto error_out;
         }
  
         req->errors = 0;
@@ -592,8 +591,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
                 fsync_bdev(bdev);
                 mutex_lock(&nbd->tx_lock);
                 blk_rq_init(NULL, &sreq);
-               sreq.cmd_type = REQ_TYPE_SPECIAL;
-               nbd_cmd(&sreq) = NBD_CMD_DISC;
+               sreq.cmd_type = REQ_TYPE_DRV_PRIV;
  
                 /* Check again after getting mutex back.  */
                 if (!nbd->sock)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c

index d48715b287e667bea5205ec75f6e6c40792149a4..dbb4da1cdca8b2f91ed50a4527824dd21bd5bfab 100644 (file)
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -442,7 +442,7 @@ static char *pd_buf;                /* buffer for request in progress */
  
  static enum action do_pd_io_start(void)
  {
-       if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
+       if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) {
                 phase = pd_special;
                 return pd_special();
         }
@@ -725,7 +725,7 @@ static int pd_special_command(struct pd_unit *disk,
         if (IS_ERR(rq))
                 return PTR_ERR(rq);
  
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->special = func;
  
         err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c

index 5d552857de412e32864ba4378a2313014c7b3564..59c91d49b14b649f839f8af5951772436d959da8 100644 (file)
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -620,7 +620,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
         spin_unlock_irq(&host->lock);
  
         DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
-       crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+       crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
         crq->rq->special = crq;
         blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
  
@@ -661,7 +661,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
         crq->msg_bucket = (u32) rc;
  
         DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
-       crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+       crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
         crq->rq->special = crq;
         blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
  
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index 5ea2f0bbbc7c3d6e62a016bcce22ff5a8ce46c1b..d4d05f064d390772a2f99acbf882eaa983788511 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -124,7 +124,7 @@ static inline void virtblk_request_done(struct request *req)
                 req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
                 req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
                 req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
-       } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
+       } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
                 req->errors = (error != 0);
         }
  
@@ -188,7 +188,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
                         vbr->out_hdr.sector = 0;
                         vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
                         break;
-               case REQ_TYPE_SPECIAL:
+               case REQ_TYPE_DRV_PRIV:
                         vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
                         vbr->out_hdr.sector = 0;
                         vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
@@ -251,7 +251,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
                 return PTR_ERR(req);
         }
  
-       req->cmd_type = REQ_TYPE_SPECIAL;
+       req->cmd_type = REQ_TYPE_DRV_PRIV;
         err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
         blk_put_request(req);
  
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c

index fac3d9da2e07db38df577c0ed76f5f72202641ea..1362ad80a76c071e9e2abb2b36472e433bc5dae0 100644 (file)
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
         int error;
  
         rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->special = (char *)pc;
  
         if (buf && bufflen) {
@@ -191,7 +191,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
  
         BUG_ON(sense_len > sizeof(*sense));
  
-       if (rq->cmd_type == REQ_TYPE_SENSE || drive->sense_rq_armed)
+       if (rq->cmd_type == REQ_TYPE_ATA_SENSE || drive->sense_rq_armed)
                 return;
  
         memset(sense, 0, sizeof(*sense));
@@ -210,7 +210,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
         sense_rq->rq_disk = rq->rq_disk;
         sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
         sense_rq->cmd[4] = cmd_len;
-       sense_rq->cmd_type = REQ_TYPE_SENSE;
+       sense_rq->cmd_type = REQ_TYPE_ATA_SENSE;
         sense_rq->cmd_flags |= REQ_PREEMPT;
  
         if (drive->media == ide_tape)
@@ -310,7 +310,7 @@ int ide_cd_get_xferlen(struct request *rq)
         switch (rq->cmd_type) {
         case REQ_TYPE_FS:
                 return 32768;
-       case REQ_TYPE_SENSE:
+       case REQ_TYPE_ATA_SENSE:
         case REQ_TYPE_BLOCK_PC:
         case REQ_TYPE_ATA_PC:
                 return blk_rq_bytes(rq);
@@ -477,7 +477,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                 if (uptodate == 0)
                         drive->failed_pc = NULL;
  
-               if (rq->cmd_type == REQ_TYPE_SPECIAL) {
+               if (rq->cmd_type == REQ_TYPE_DRV_PRIV) {
                         rq->errors = 0;
                         error = 0;
                 } else {
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c

index 0b510bafd90e2904d989d4f8ce06b89f50ea1f76..64a6b827b3dd12210e23008a81af06e9d9d7cb64 100644 (file)
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -210,7 +210,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
  static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
  {
         /*
-        * For REQ_TYPE_SENSE, "rq->special" points to the original
+        * For REQ_TYPE_ATA_SENSE, "rq->special" points to the original
          * failed request.  Also, the sense data should be read
          * directly from rq which might be different from the original
          * sense buffer if it got copied during mapping.
@@ -285,7 +285,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
                                   "stat 0x%x",
                                   rq->cmd[0], rq->cmd_type, err, stat);
  
-       if (rq->cmd_type == REQ_TYPE_SENSE) {
+       if (rq->cmd_type == REQ_TYPE_ATA_SENSE) {
                 /*
                  * We got an error trying to get sense info from the drive
                  * (probably while trying to recover from a former error).
@@ -526,7 +526,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
         ide_expiry_t *expiry = NULL;
         int dma_error = 0, dma, thislen, uptodate = 0;
         int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
-       int sense = (rq->cmd_type == REQ_TYPE_SENSE);
+       int sense = (rq->cmd_type == REQ_TYPE_ATA_SENSE);
         unsigned int timeout;
         u16 len;
         u8 ireason, stat;
@@ -791,7 +791,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
                 if (cdrom_start_rw(drive, rq) == ide_stopped)
                         goto out_end;
                 break;
-       case REQ_TYPE_SENSE:
+       case REQ_TYPE_ATA_SENSE:
         case REQ_TYPE_BLOCK_PC:
         case REQ_TYPE_ATA_PC:
                 if (!rq->timeout)
@@ -799,7 +799,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
  
                 cdrom_do_block_pc(drive, rq);
                 break;
-       case REQ_TYPE_SPECIAL:
+       case REQ_TYPE_DRV_PRIV:
                 /* right now this can only be a reset... */
                 uptodate = 1;
                 goto out_end;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c

index 02caa7dd51c83f99e34e05de972f67020945a2d6..066e3903651842fa31c82bc278c8f7434bf6c950 100644 (file)
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
         int ret;
  
         rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->cmd_flags = REQ_QUIET;
         ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
         blk_put_request(rq);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c

index 9e98122f646e3190c85dc3d2813102f5a00f49ec..b05a74d78ef560deefd8f022d27f145766e094b4 100644 (file)
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
                 return setting->set(drive, arg);
  
         rq = blk_get_request(q, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->cmd_len = 5;
         rq->cmd[0] = REQ_DEVSET_EXEC;
         *(int *)&rq->cmd[1] = arg;
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c

index 32970664c275448d65c44f6e8252b5de129c064c..d6da011299f582934bf50c1c05c0719577d41ba6 100644 (file)
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -129,7 +129,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
  
                         if (cmd)
                                 ide_complete_cmd(drive, cmd, stat, err);
-               } else if (blk_pm_request(rq)) {
+               } else if (ata_pm_request(rq)) {
                         rq->errors = 1;
                         ide_complete_pm_rq(drive, rq);
                         return ide_stopped;
@@ -147,7 +147,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
  {
         struct request *rq = drive->hwif->rq;
  
-       if (rq && rq->cmd_type == REQ_TYPE_SPECIAL &&
+       if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV &&
             rq->cmd[0] == REQ_DRIVE_RESET) {
                 if (err <= 0 && rq->errors == 0)
                         rq->errors = -EIO;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c

index 8c6363cdd2084a55ae4cfc9141e08cccde03c223..2fb5350c54105000d57bf1672b101e3e40fdd9ca 100644 (file)
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -97,7 +97,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
                                "Aborting request!\n");
         }
  
-       if (rq->cmd_type == REQ_TYPE_SPECIAL)
+       if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
                 rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
  
         return uptodate;
@@ -246,7 +246,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                 } else
                         printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
  
-               if (rq->cmd_type == REQ_TYPE_SPECIAL) {
+               if (rq->cmd_type == REQ_TYPE_DRV_PRIV) {
                         rq->errors = 0;
                         ide_complete_rq(drive, 0, blk_rq_bytes(rq));
                         return ide_stopped;
@@ -265,8 +265,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                 pc = &floppy->queued_pc;
                 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
                 break;
-       case REQ_TYPE_SPECIAL:
-       case REQ_TYPE_SENSE:
+       case REQ_TYPE_DRV_PRIV:
+       case REQ_TYPE_ATA_SENSE:
                 pc = (struct ide_atapi_pc *)rq->special;
                 break;
         case REQ_TYPE_BLOCK_PC:
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c

index 177db6d5b2f58972e5dbcf3f5cc1f99cf4f27079..669ea1e457958699849687e0efc78809758f8f92 100644 (file)
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -135,7 +135,7 @@ EXPORT_SYMBOL(ide_complete_rq);
  
  void ide_kill_rq(ide_drive_t *drive, struct request *rq)
  {
-       u8 drv_req = (rq->cmd_type == REQ_TYPE_SPECIAL) && rq->rq_disk;
+       u8 drv_req = (rq->cmd_type == REQ_TYPE_DRV_PRIV) && rq->rq_disk;
         u8 media = drive->media;
  
         drive->failed_pc = NULL;
@@ -320,7 +320,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
                 goto kill_rq;
         }
  
-       if (blk_pm_request(rq))
+       if (ata_pm_request(rq))
                 ide_check_pm_state(drive, rq);
  
         drive->hwif->tp_ops->dev_select(drive);
@@ -342,8 +342,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
  
                 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
                         return execute_drive_cmd(drive, rq);
-               else if (blk_pm_request(rq)) {
-                       struct request_pm_state *pm = rq->special;
+               else if (ata_pm_request(rq)) {
+                       struct ide_pm_state *pm = rq->special;
  #ifdef DEBUG_PM
                         printk("%s: start_power_step(step: %d)\n",
                                 drive->name, pm->pm_step);
@@ -353,7 +353,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
                             pm->pm_step == IDE_PM_COMPLETED)
                                 ide_complete_pm_rq(drive, rq);
                         return startstop;
-               } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_SPECIAL)
+               } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_DRV_PRIV)
                         /*
                          * TODO: Once all ULDs have been modified to
                          * check for specific op codes rather than
@@ -538,7 +538,7 @@ repeat:
                  * state machine.
                  */
                 if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
-                   blk_pm_request(rq) == 0 &&
+                   ata_pm_request(rq) == 0 &&
                     (rq->cmd_flags & REQ_PREEMPT) == 0) {
                         /* there should be no pending command at this point */
                         ide_unlock_port(hwif);
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c

index 6233fa2cb8a97806490b983c8013dcee09567b39..aa2e9b77b20d39a67d3da80fd23338fcb631faf6 100644 (file)
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive)
         int ret = 0;
  
         rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->cmd_len = 1;
         rq->cmd[0] = REQ_DRIVE_RESET;
         if (blk_execute_rq(drive->queue, NULL, rq, 1))
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c

index ca958604cda21217f6d6a1bce6240fbf07fb2d7f..c808685204883db93213e80ee9516a298e1f58b1 100644 (file)
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -34,7 +34,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
         rq = blk_get_request(q, READ, __GFP_WAIT);
         rq->cmd[0] = REQ_PARK_HEADS;
         rq->cmd_len = 1;
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->special = &timeout;
         rc = blk_execute_rq(q, NULL, rq, 1);
         blk_put_request(rq);
@@ -51,7 +51,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
  
         rq->cmd[0] = REQ_UNPARK_HEADS;
         rq->cmd_len = 1;
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
  
  out:
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c

index 8d1e32d7cd9767db4e1c99db627681f03f233139..081e43458d50f745671f9c5c4476d760e33b7fc0 100644 (file)
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -8,7 +8,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
         ide_drive_t *pair = ide_get_pair_dev(drive);
         ide_hwif_t *hwif = drive->hwif;
         struct request *rq;
-       struct request_pm_state rqpm;
+       struct ide_pm_state rqpm;
         int ret;
  
         if (ide_port_acpi(hwif)) {
@@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
  
         memset(&rqpm, 0, sizeof(rqpm));
         rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_PM_SUSPEND;
+       rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
         rq->special = &rqpm;
         rqpm.pm_step = IDE_PM_START_SUSPEND;
         if (mesg.event == PM_EVENT_PRETHAW)
@@ -38,13 +38,43 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
         return ret;
  }
  
+static void ide_end_sync_rq(struct request *rq, int error)
+{
+       complete(rq->end_io_data);
+}
+
+static int ide_pm_execute_rq(struct request *rq)
+{
+       struct request_queue *q = rq->q;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       rq->end_io_data = &wait;
+       rq->end_io = ide_end_sync_rq;
+
+       spin_lock_irq(q->queue_lock);
+       if (unlikely(blk_queue_dying(q))) {
+               rq->cmd_flags |= REQ_QUIET;
+               rq->errors = -ENXIO;
+               __blk_end_request_all(rq, rq->errors);
+               spin_unlock_irq(q->queue_lock);
+               return -ENXIO;
+       }
+       __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
+       __blk_run_queue_uncond(q);
+       spin_unlock_irq(q->queue_lock);
+
+       wait_for_completion_io(&wait);
+
+       return rq->errors ? -EIO : 0;
+}
+
  int generic_ide_resume(struct device *dev)
  {
         ide_drive_t *drive = to_ide_device(dev);
         ide_drive_t *pair = ide_get_pair_dev(drive);
         ide_hwif_t *hwif = drive->hwif;
         struct request *rq;
-       struct request_pm_state rqpm;
+       struct ide_pm_state rqpm;
         int err;
  
         if (ide_port_acpi(hwif)) {
@@ -59,13 +89,13 @@ int generic_ide_resume(struct device *dev)
  
         memset(&rqpm, 0, sizeof(rqpm));
         rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_PM_RESUME;
+       rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
         rq->cmd_flags |= REQ_PREEMPT;
         rq->special = &rqpm;
         rqpm.pm_step = IDE_PM_START_RESUME;
         rqpm.pm_state = PM_EVENT_ON;
  
-       err = blk_execute_rq(drive->queue, NULL, rq, 1);
+       err = ide_pm_execute_rq(rq);
         blk_put_request(rq);
  
         if (err == 0 && dev->driver) {
@@ -80,7 +110,7 @@ int generic_ide_resume(struct device *dev)
  
  void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
  {
-       struct request_pm_state *pm = rq->special;
+       struct ide_pm_state *pm = rq->special;
  
  #ifdef DEBUG_PM
         printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -110,7 +140,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
  
  ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
  {
-       struct request_pm_state *pm = rq->special;
+       struct ide_pm_state *pm = rq->special;
         struct ide_cmd cmd = { };
  
         switch (pm->pm_step) {
@@ -182,7 +212,7 @@ out_do_tf:
  void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
  {
         struct request_queue *q = drive->queue;
-       struct request_pm_state *pm = rq->special;
+       struct ide_pm_state *pm = rq->special;
         unsigned long flags;
  
         ide_complete_power_step(drive, rq);
@@ -191,10 +221,10 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
  
  #ifdef DEBUG_PM
         printk("%s: completing PM request, %s\n", drive->name,
-              (rq->cmd_type == REQ_TYPE_PM_SUSPEND) ? "suspend" : "resume");
+              (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) ? "suspend" : "resume");
  #endif
         spin_lock_irqsave(q->queue_lock, flags);
-       if (rq->cmd_type == REQ_TYPE_PM_SUSPEND)
+       if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND)
                 blk_stop_queue(q);
         else
                 drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
@@ -208,13 +238,13 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
  
  void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
  {
-       struct request_pm_state *pm = rq->special;
+       struct ide_pm_state *pm = rq->special;
  
-       if (rq->cmd_type == REQ_TYPE_PM_SUSPEND &&
+       if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND &&
             pm->pm_step == IDE_PM_START_SUSPEND)
                 /* Mark drive blocked when starting the suspend sequence. */
                 drive->dev_flags |= IDE_DFLAG_BLOCKED;
-       else if (rq->cmd_type == REQ_TYPE_PM_RESUME &&
+       else if (rq->cmd_type == REQ_TYPE_ATA_PM_RESUME &&
                  pm->pm_step == IDE_PM_START_RESUME) {
                 /*
                  * The first thing we do on wakeup is to wait for BSY bit to
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c

index 6eb738ca6d2f353717aac1e63bade383f763afa5..f5d51d1d09ee480becca86a1cda687ab3919cdb9 100644 (file)
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -576,8 +576,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
                       rq->cmd[0], (unsigned long long)blk_rq_pos(rq),
                       blk_rq_sectors(rq));
  
-       BUG_ON(!(rq->cmd_type == REQ_TYPE_SPECIAL ||
-                rq->cmd_type == REQ_TYPE_SENSE));
+       BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV ||
+                rq->cmd_type == REQ_TYPE_ATA_SENSE));
  
         /* Retry a failed packet command */
         if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) {
@@ -853,7 +853,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
         BUG_ON(size < 0 || size % tape->blk_size);
  
         rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-       rq->cmd_type = REQ_TYPE_SPECIAL;
+       rq->cmd_type = REQ_TYPE_DRV_PRIV;
         rq->cmd[13] = cmd;
         rq->rq_disk = tape->disk;
         rq->__sector = tape->first_frame;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c

index dabb88b1cbec69052d32eb5e84b7f647534a4599..0979e126fff1e69ee3b3f8df19e5a443cbae5194 100644 (file)
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -186,7 +186,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
             tf->command == ATA_CMD_CHK_POWER) {
                 struct request *rq = hwif->rq;
  
-               if (blk_pm_request(rq))
+               if (ata_pm_request(rq))
                         ide_complete_pm_rq(drive, rq);
                 else
                         ide_finish_cmd(drive, cmd, stat);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c

index fa028fa82df41bf509e15d140f9b3b943c5a8c2d..cb64e64a478954e5c7dec8f774df29852fad0e01 100644 (file)
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -55,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl)
  
         s->bio->bi_end_io = s->bi_end_io;
         s->bio->bi_private = s->bi_private;
-       bio_endio_nodec(s->bio, 0);
+       bio_endio(s->bio, 0);
  
         closure_debug_destroy(&s->cl);
         mempool_free(s, s->p->bio_split_hook);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c

index ab43faddb447e365f85fdfddb53ca4accddb0f17..1616f668a4cb043741520d724b2f3602da0e07be 100644 (file)
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -619,7 +619,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
         bio->bi_end_io          = request_endio;
         bio->bi_private         = &s->cl;
  
-       atomic_set(&bio->bi_cnt, 3);
+       bio_cnt_set(bio, 3);
  }
  
  static void search_free(struct closure *cl)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c

index 7755af35186762a4319e8cff52d4e95b26524d3e..41b2594a80c63010c5282ac5d61079fc61fc986e 100644 (file)
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -86,12 +86,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
  {
         bio->bi_end_io = h->bi_end_io;
         bio->bi_private = h->bi_private;
-
-       /*
-        * Must bump bi_remaining to allow bio to complete with
-        * restored bi_end_io.
-        */
-       atomic_inc(&bio->bi_remaining);
  }
  
  /*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c

index 089d62751f7ff2a3aedf7e441cb88bec0d06b8a7..743fa9bbae9eaafeb5e2fb7063319622e4987861 100644 (file)
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1254,8 +1254,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                         dm_bio_restore(bd, bio);
                         bio_record->details.bi_bdev = NULL;
  
-                       atomic_inc(&bio->bi_remaining);
-
                         queue_bio(ms, bio, rw);
                         return DM_ENDIO_INCOMPLETE;
                 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c

index f83a0f3fc3656680c7bdba2dcd4bdaaac9f2f624..7c82d3ccce871f2cd4b3293fb25bda8749c4e538 100644 (file)
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1478,7 +1478,6 @@ out:
         if (full_bio) {
                 full_bio->bi_end_io = pe->full_bio_end_io;
                 full_bio->bi_private = pe->full_bio_private;
-               atomic_inc(&full_bio->bi_remaining);
         }
         increment_pending_exceptions_done_count();
  
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 16ba55ad708992f7e942b2f6ce2048d12be5c1b6..a5f94125ad01f6b3a3a43fd7ae78e9bad0747995 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -942,21 +942,28 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
  {
         unsigned type = dm_table_get_type(t);
         unsigned per_bio_data_size = 0;
-       struct dm_target *tgt;
         unsigned i;
  
-       if (unlikely(type == DM_TYPE_NONE)) {
+       switch (type) {
+       case DM_TYPE_BIO_BASED:
+               for (i = 0; i < t->num_targets; i++) {
+                       struct dm_target *tgt = t->targets + i;
+
+                       per_bio_data_size = max(per_bio_data_size,
+                                               tgt->per_bio_data_size);
+               }
+               t->mempools = dm_alloc_bio_mempools(t->integrity_supported,
+                                                   per_bio_data_size);
+               break;
+       case DM_TYPE_REQUEST_BASED:
+       case DM_TYPE_MQ_REQUEST_BASED:
+               t->mempools = dm_alloc_rq_mempools(md, type);
+               break;
+       default:
                 DMWARN("no table type is set, can't allocate mempools");
                 return -EINVAL;
         }
  
-       if (type == DM_TYPE_BIO_BASED)
-               for (i = 0; i < t->num_targets; i++) {
-                       tgt = t->targets + i;
-                       per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
-               }
-
-       t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
         if (!t->mempools)
                 return -ENOMEM;
  
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index 921aafd12aee6754c373fbbd0df8941219b5c1eb..e852602c0091986bfeae804fcd69fe2bc2120992 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -793,10 +793,9 @@ static void inc_remap_and_issue_cell(struct thin_c *tc,
  
  static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
  {
-       if (m->bio) {
+       if (m->bio)
                 m->bio->bi_end_io = m->saved_bi_end_io;
-               atomic_inc(&m->bio->bi_remaining);
-       }
+
         cell_error(m->tc->pool, m->cell);
         list_del(&m->list);
         mempool_free(m, m->tc->pool->mapping_pool);
@@ -810,10 +809,8 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
         int r;
  
         bio = m->bio;
-       if (bio) {
+       if (bio)
                 bio->bi_end_io = m->saved_bi_end_io;
-               atomic_inc(&bio->bi_remaining);
-       }
  
         if (m->err) {
                 cell_error(pool, m->cell);
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c

index 66616db33e6fdbc14896704fe50483930ffb1cfd..bb9c6a00e4b05e0330e3917d52226fb962e5c1bb 100644 (file)
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -459,7 +459,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
         bio->bi_end_io = io->orig_bi_end_io;
         bio->bi_private = io->orig_bi_private;
  
-       bio_endio_nodec(bio, error);
+       bio_endio(bio, error);
  }
  
  static void verity_work(struct work_struct *w)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index 2caf492890d64b27a0a88f24f4f04d1778448d9a..4d6f089a0e9e2eca5b8fa58017a29e1da598c2a0 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -990,57 +990,6 @@ static void clone_endio(struct bio *bio, int error)
         dec_pending(io, error);
  }
  
-/*
- * Partial completion handling for request-based dm
- */
-static void end_clone_bio(struct bio *clone, int error)
-{
-       struct dm_rq_clone_bio_info *info =
-               container_of(clone, struct dm_rq_clone_bio_info, clone);
-       struct dm_rq_target_io *tio = info->tio;
-       struct bio *bio = info->orig;
-       unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-
-       bio_put(clone);
-
-       if (tio->error)
-               /*
-                * An error has already been detected on the request.
-                * Once error occurred, just let clone->end_io() handle
-                * the remainder.
-                */
-               return;
-       else if (error) {
-               /*
-                * Don't notice the error to the upper layer yet.
-                * The error handling decision is made by the target driver,
-                * when the request is completed.
-                */
-               tio->error = error;
-               return;
-       }
-
-       /*
-        * I/O for the bio successfully completed.
-        * Notice the data completion to the upper layer.
-        */
-
-       /*
-        * bios are processed from the head of the list.
-        * So the completing bio should always be rq->bio.
-        * If it's not, something wrong is happening.
-        */
-       if (tio->orig->bio != bio)
-               DMERR("bio completion is going in the middle of the request");
-
-       /*
-        * Update the original request.
-        * Do not use blk_end_request() here, because it may complete
-        * the original request before the clone, and break the ordering.
-        */
-       blk_update_request(tio->orig, 0, nr_bytes);
-}
-
  static struct dm_rq_target_io *tio_from_request(struct request *rq)
  {
         return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
@@ -1087,8 +1036,6 @@ static void free_rq_clone(struct request *clone)
         struct dm_rq_target_io *tio = clone->end_io_data;
         struct mapped_device *md = tio->md;
  
-       blk_rq_unprep_clone(clone);
-
         if (md->type == DM_TYPE_MQ_REQUEST_BASED)
                 /* stacked on blk-mq queue(s) */
                 tio->ti->type->release_clone_rq(clone);
@@ -1827,39 +1774,13 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
                 dm_complete_request(rq, r);
  }
  
-static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
-                                void *data)
+static void setup_clone(struct request *clone, struct request *rq,
+                       struct dm_rq_target_io *tio)
  {
-       struct dm_rq_target_io *tio = data;
-       struct dm_rq_clone_bio_info *info =
-               container_of(bio, struct dm_rq_clone_bio_info, clone);
-
-       info->orig = bio_orig;
-       info->tio = tio;
-       bio->bi_end_io = end_clone_bio;
-
-       return 0;
-}
-
-static int setup_clone(struct request *clone, struct request *rq,
-                      struct dm_rq_target_io *tio, gfp_t gfp_mask)
-{
-       int r;
-
-       r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
-                             dm_rq_bio_constructor, tio);
-       if (r)
-               return r;
-
-       clone->cmd = rq->cmd;
-       clone->cmd_len = rq->cmd_len;
-       clone->sense = rq->sense;
+       blk_rq_prep_clone(clone, rq);
         clone->end_io = end_clone_request;
         clone->end_io_data = tio;
-
         tio->clone = clone;
-
-       return 0;
  }
  
  static struct request *clone_rq(struct request *rq, struct mapped_device *md,
@@ -1880,12 +1801,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
                 clone = tio->clone;
  
         blk_rq_init(NULL, clone);
-       if (setup_clone(clone, rq, tio, gfp_mask)) {
-               /* -ENOMEM */
-               if (alloc_clone)
-                       free_clone_request(md, clone);
-               return NULL;
-       }
+       setup_clone(clone, rq, tio);
  
         return clone;
  }
@@ -1979,11 +1895,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                 }
                 if (r != DM_MAPIO_REMAPPED)
                         return r;
-               if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
-                       /* -ENOMEM */
-                       ti->type->release_clone_rq(clone);
-                       return DM_MAPIO_REQUEUE;
-               }
+               setup_clone(clone, rq, tio);
         }
  
         switch (r) {
@@ -2437,8 +2349,6 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
                 goto out;
         }
  
-       BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
-
         md->io_pool = p->io_pool;
         p->io_pool = NULL;
         md->rq_pool = p->rq_pool;
@@ -3544,48 +3454,23 @@ int dm_noflush_suspending(struct dm_target *ti)
  }
  EXPORT_SYMBOL_GPL(dm_noflush_suspending);
  
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
-                                           unsigned integrity, unsigned per_bio_data_size)
+struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
+                                            unsigned per_bio_data_size)
  {
-       struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
-       struct kmem_cache *cachep = NULL;
-       unsigned int pool_size = 0;
+       struct dm_md_mempools *pools;
+       unsigned int pool_size = dm_get_reserved_bio_based_ios();
         unsigned int front_pad;
  
+       pools = kzalloc(sizeof(*pools), GFP_KERNEL);
         if (!pools)
                 return NULL;
  
-       type = filter_md_type(type, md);
+       front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) +
+               offsetof(struct dm_target_io, clone);
  
-       switch (type) {
-       case DM_TYPE_BIO_BASED:
-               cachep = _io_cache;
-               pool_size = dm_get_reserved_bio_based_ios();
-               front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
-               break;
-       case DM_TYPE_REQUEST_BASED:
-               cachep = _rq_tio_cache;
-               pool_size = dm_get_reserved_rq_based_ios();
-               pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
-               if (!pools->rq_pool)
-                       goto out;
-               /* fall through to setup remaining rq-based pools */
-       case DM_TYPE_MQ_REQUEST_BASED:
-               if (!pool_size)
-                       pool_size = dm_get_reserved_rq_based_ios();
-               front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
-               /* per_bio_data_size is not used. See __bind_mempools(). */
-               WARN_ON(per_bio_data_size != 0);
-               break;
-       default:
-               BUG();
-       }
-
-       if (cachep) {
-               pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
-               if (!pools->io_pool)
-                       goto out;
-       }
+       pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+       if (!pools->io_pool)
+               goto out;
  
         pools->bs = bioset_create_nobvec(pool_size, front_pad);
         if (!pools->bs)
@@ -3595,10 +3480,34 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
                 goto out;
  
         return pools;
-
  out:
         dm_free_md_mempools(pools);
+       return NULL;
+}
+
+struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md,
+                                           unsigned type)
+{
+       unsigned int pool_size = dm_get_reserved_rq_based_ios();
+       struct dm_md_mempools *pools;
+
+       pools = kzalloc(sizeof(*pools), GFP_KERNEL);
+       if (!pools)
+               return NULL;
+
+       if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) {
+               pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
+               if (!pools->rq_pool)
+                       goto out;
+       }
  
+       pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache);
+       if (!pools->io_pool)
+               goto out;
+
+       return pools;
+out:
+       dm_free_md_mempools(pools);
         return NULL;
  }
  
diff --git a/drivers/md/dm.h b/drivers/md/dm.h

index 6123c2bf9150cb836c1ecd80ebfe51c9f9aa82fd..e6e66d087b2696ae8671631978b2fffe3c80f49e 100644 (file)
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -222,8 +222,9 @@ void dm_kcopyd_exit(void);
  /*
   * Mempool operations
   */
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
-                                           unsigned integrity, unsigned per_bio_data_size);
+struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
+                                            unsigned per_bio_data_size);
+struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type);
  void dm_free_md_mempools(struct dm_md_mempools *pools);
  
  /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 2ef9a4b72d06e46d4f3a19c73e94796cb8397347..0bccf18dc1dca062cd5cd6071dceedd0c3b333a4 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1745,7 +1745,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
         bio->bi_private = end_io_wq->private;
         bio->bi_end_io = end_io_wq->end_io;
         kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-       bio_endio_nodec(bio, error);
+       bio_endio(bio, error);
  }
  
  static int cleaner_kthread(void *arg)
@@ -3269,11 +3269,8 @@ static int write_dev_supers(struct btrfs_device *device,
   */
  static void btrfs_end_empty_barrier(struct bio *bio, int err)
  {
-       if (err) {
-               if (err == -EOPNOTSUPP)
-                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+       if (err)
                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       }
         if (bio->bi_private)
                 complete(bio->bi_private);
         bio_put(bio);
@@ -3301,11 +3298,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
  
                 wait_for_completion(&device->flush_wait);
  
-               if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                       printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
-                                     rcu_str_deref(device->name));
-                       device->nobarriers = 1;
-               } else if (!bio_flagged(bio, BIO_UPTODATE)) {
+               if (!bio_flagged(bio, BIO_UPTODATE)) {
                         ret = -EIO;
                         btrfs_dev_stat_inc_and_print(device,
                                 BTRFS_DEV_STAT_FLUSH_ERRS);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index c32d226bfeccbb28f25f2f417fa9e57b14411136..c374e1e71e5f3e1b80713f39fc4338fb659a8c90 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2767,8 +2767,6 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
         else
                 btrfsic_submit_bio(rw, bio);
  
-       if (bio_flagged(bio, BIO_EOPNOTSUPP))
-               ret = -EOPNOTSUPP;
         bio_put(bio);
         return ret;
  }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 174f5e1e00abfa533b1cb7483e44aae0f550e63a..53af23f2c087ad015e720af094fbfc53e1671317 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -345,7 +345,7 @@ loop_lock:
                     waitqueue_active(&fs_info->async_submit_wait))
                         wake_up(&fs_info->async_submit_wait);
  
-               BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+               BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
  
                 /*
                  * if we're doing the sync list, record that our
@@ -5586,10 +5586,10 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
  
  static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
  {
-       if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
-               bio_endio_nodec(bio, err);
-       else
-               bio_endio(bio, err);
+       bio->bi_private = bbio->private;
+       bio->bi_end_io = bbio->end_io;
+       bio_endio(bio, err);
+
         btrfs_put_bbio(bbio);
  }
  
@@ -5633,8 +5633,6 @@ static void btrfs_end_bio(struct bio *bio, int err)
                         bio = bbio->orig_bio;
                 }
  
-               bio->bi_private = bbio->private;
-               bio->bi_end_io = bbio->end_io;
                 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                 /* only send an error to the higher layers if it is
                  * beyond the tolerance of the btrfs bio
@@ -5816,8 +5814,6 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
                 /* Shoud be the original bio. */
                 WARN_ON(bio != bbio->orig_bio);
  
-               bio->bi_private = bbio->private;
-               bio->bi_end_io = bbio->end_io;
                 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                 bio->bi_iter.bi_sector = logical >> 9;
  
@@ -5898,10 +5894,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                 if (dev_nr < total_devs - 1) {
                         bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                         BUG_ON(!bio); /* -ENOMEM */
-               } else {
+               } else
                         bio = first_bio;
-                       bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
-               }
  
                 submit_stripe_bio(root, bbio, bio,
                                   bbio->stripes[dev_nr].physical, dev_nr, rw,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h

index ebc31331a83746b23b4e99ac146b25fc03b31cc3..cedae0356558d92abd20ee9daf72df3eadbbb7db 100644 (file)
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -292,8 +292,6 @@ struct btrfs_bio_stripe {
  struct btrfs_bio;
  typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
  
-#define BTRFS_BIO_ORIG_BIO_SUBMITTED   (1 << 0)
-
  struct btrfs_bio {
         atomic_t refs;
         atomic_t stripes_pending;
diff --git a/fs/buffer.c b/fs/buffer.c

index c7a5602d01eed200912d3a90ca4ac6780209cb6f..f96173ad62d90413662e5ff15b14ed0df854ba5a 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2938,10 +2938,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
  {
         struct buffer_head *bh = bio->bi_private;
  
-       if (err == -EOPNOTSUPP) {
-               set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-       }
-
         if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
                 set_bit(BH_Quiet, &bh->b_state);
  
@@ -3000,7 +2996,6 @@ void guard_bio_eod(int rw, struct bio *bio)
  int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
  {
         struct bio *bio;
-       int ret = 0;
  
         BUG_ON(!buffer_locked(bh));
         BUG_ON(!buffer_mapped(bh));
@@ -3041,14 +3036,8 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
         if (buffer_prio(bh))
                 rw |= REQ_PRIO;
  
-       bio_get(bio);
         submit_bio(rw, bio);
-
-       if (bio_flagged(bio, BIO_EOPNOTSUPP))
-               ret = -EOPNOTSUPP;
-
-       bio_put(bio);
-       return ret;
+       return 0;
  }
  EXPORT_SYMBOL_GPL(_submit_bh);
  
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c

index 79636e21d3a2d28e7844a9d4adeff3bbc947d87a..5602450f03f6497f97a25b954779435e7cce8c1a 100644 (file)
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -359,7 +359,6 @@ void ext4_io_submit(struct ext4_io_submit *io)
         if (bio) {
                 bio_get(io->io_bio);
                 submit_bio(io->io_op, io->io_bio);
-               BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
                 bio_put(io->io_bio);
         }
         io->io_bio = NULL;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c

index dc3a9efdaab87751e47edcf9ef3a807fed4573db..42468e5ab3e71ab69d1f121323d6adb28cb67325 100644 (file)
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -343,11 +343,6 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
         const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct nilfs_segment_buffer *segbuf = bio->bi_private;
  
-       if (err == -EOPNOTSUPP) {
-               set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-               /* to be detected by nilfs_segbuf_submit_bio() */
-       }
-
         if (!uptodate)
                 atomic_inc(&segbuf->sb_err);
  
@@ -374,15 +369,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
  
         bio->bi_end_io = nilfs_end_bio_write;
         bio->bi_private = segbuf;
-       bio_get(bio);
         submit_bio(mode, bio);
         segbuf->sb_nbio++;
-       if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-               bio_put(bio);
-               err = -EOPNOTSUPP;
-               goto failed;
-       }
-       bio_put(bio);
  
         wi->bio = NULL;
         wi->rest_blocks -= wi->end - wi->start;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index a56960dd16847bccd3fea619f28aedad0f744f31..095f94c2d8b564a46d3c48636fb0532e6b10a658 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -356,7 +356,6 @@ xfs_end_bio(
  {
         xfs_ioend_t             *ioend = bio->bi_private;
  
-       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
         ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
  
         /* Toss bio and pass work off to an xfsdatad thread */
diff --git a/include/linux/bio.h b/include/linux/bio.h

index da3a127c99583ba1c38d9cad1ed35550bb9666ca..f0291cf64cc5f6d26a2e60ee5e1cfc02431556d5 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio)
   * returns. and then bio would be freed memory when if (bio->bi_flags ...)
   * runs
   */
-#define bio_get(bio)   atomic_inc(&(bio)->bi_cnt)
+static inline void bio_get(struct bio *bio)
+{
+       bio->bi_flags |= (1 << BIO_REFFED);
+       smp_mb__before_atomic();
+       atomic_inc(&bio->__bi_cnt);
+}
+
+static inline void bio_cnt_set(struct bio *bio, unsigned int count)
+{
+       if (count != 1) {
+               bio->bi_flags |= (1 << BIO_REFFED);
+               smp_mb__before_atomic();
+       }
+       atomic_set(&bio->__bi_cnt, count);
+}
  
  enum bip_flags {
         BIP_BLOCK_INTEGRITY     = 1 << 0, /* block layer owns integrity data */
@@ -413,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
  }
  
  extern void bio_endio(struct bio *, int);
-extern void bio_endio_nodec(struct bio *, int);
  struct request_queue;
  extern int bio_phys_segments(struct request_queue *, struct bio *);
  
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index 2056a99b92f8c905d2cbc53ee7a2f022893da27e..37d1602c4f7aa08b464577c675910046a4db3dde 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -96,6 +96,7 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int,
  
  typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
                 bool);
+typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
  
  struct blk_mq_ops {
         /*
@@ -182,6 +183,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
  struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
                 gfp_t gfp, bool reserved);
  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
  
  enum {
         BLK_MQ_UNIQUE_TAG_BITS = 16,
@@ -224,6 +226,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async);
  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
  void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
                 void *priv);
+void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
+               void *priv);
  void blk_mq_freeze_queue(struct request_queue *q);
  void blk_mq_unfreeze_queue(struct request_queue *q);
  void blk_mq_freeze_queue_start(struct request_queue *q);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index b7299febc4b4adfee00cb8b05d6fbf6558f01547..6ab9d12d1f17762190870ab153129bac56b82176 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -65,7 +65,7 @@ struct bio {
         unsigned int            bi_seg_front_size;
         unsigned int            bi_seg_back_size;
  
-       atomic_t                bi_remaining;
+       atomic_t                __bi_remaining;
  
         bio_end_io_t            *bi_end_io;
  
@@ -92,7 +92,7 @@ struct bio {
  
         unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
  
-       atomic_t                bi_cnt;         /* pin count */
+       atomic_t                __bi_cnt;       /* pin count */
  
         struct bio_vec          *bi_io_vec;     /* the actual vec list */
  
@@ -112,16 +112,15 @@ struct bio {
   * bio flags
   */
  #define BIO_UPTODATE   0       /* ok after I/O completion */
-#define BIO_RW_BLOCK   1       /* RW_AHEAD set, and read/write would block */
-#define BIO_EOF                2       /* out-out-bounds error */
-#define BIO_SEG_VALID  3       /* bi_phys_segments valid */
-#define BIO_CLONED     4       /* doesn't own data */
-#define BIO_BOUNCED    5       /* bio is a bounce bio */
-#define BIO_USER_MAPPED 6      /* contains user pages */
-#define BIO_EOPNOTSUPP 7       /* not supported */
-#define BIO_NULL_MAPPED 8      /* contains invalid user pages */
-#define BIO_QUIET      9       /* Make BIO Quiet */
-#define BIO_SNAP_STABLE        10      /* bio data must be snapshotted during write */
+#define BIO_SEG_VALID  1       /* bi_phys_segments valid */
+#define BIO_CLONED     2       /* doesn't own data */
+#define BIO_BOUNCED    3       /* bio is a bounce bio */
+#define BIO_USER_MAPPED 4      /* contains user pages */
+#define BIO_NULL_MAPPED 5      /* contains invalid user pages */
+#define BIO_QUIET      6       /* Make BIO Quiet */
+#define BIO_SNAP_STABLE        7       /* bio data must be snapshotted during write */
+#define BIO_CHAIN      8       /* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED     9       /* bio has elevated ->bi_cnt */
  
  /*
   * Flags starting here get preserved by bio_reset() - this includes
@@ -193,6 +192,7 @@ enum rq_flag_bits {
         __REQ_HASHED,           /* on IO scheduler merge hash */
         __REQ_MQ_INFLIGHT,      /* track inflight for MQ */
         __REQ_NO_TIMEOUT,       /* requests may never expire */
+       __REQ_CLONE,            /* cloned bios */
         __REQ_NR_BITS,          /* stops here */
  };
  
@@ -247,5 +247,6 @@ enum rq_flag_bits {
  #define REQ_HASHED             (1ULL << __REQ_HASHED)
  #define REQ_MQ_INFLIGHT                (1ULL << __REQ_MQ_INFLIGHT)
  #define REQ_NO_TIMEOUT         (1ULL << __REQ_NO_TIMEOUT)
+#define REQ_CLONE              (1ULL << __REQ_CLONE)
  
  #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 5d93a6645e88676a7d90a1ac55b5d5d6792da667..a6ae5f9bee495d03ecbf03ddac145a96abe852ff 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,7 +30,6 @@ struct scsi_ioctl_command;
  
  struct request_queue;
  struct elevator_queue;
-struct request_pm_state;
  struct blk_trace;
  struct request;
  struct sg_io_hdr;
@@ -75,18 +74,7 @@ struct request_list {
  enum rq_cmd_type_bits {
         REQ_TYPE_FS             = 1,    /* fs request */
         REQ_TYPE_BLOCK_PC,              /* scsi command */
-       REQ_TYPE_SENSE,                 /* sense request */
-       REQ_TYPE_PM_SUSPEND,            /* suspend request */
-       REQ_TYPE_PM_RESUME,             /* resume request */
-       REQ_TYPE_PM_SHUTDOWN,           /* shutdown request */
-       REQ_TYPE_SPECIAL,               /* driver defined type */
-       /*
-        * for ATA/ATAPI devices. this really doesn't belong here, ide should
-        * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
-        * private REQ_LB opcodes to differentiate what type of request this is
-        */
-       REQ_TYPE_ATA_TASKFILE,
-       REQ_TYPE_ATA_PC,
+       REQ_TYPE_DRV_PRIV,              /* driver defined types from here */
  };
  
  #define BLK_MAX_CDB    16
@@ -108,7 +96,7 @@ struct request {
         struct blk_mq_ctx *mq_ctx;
  
         u64 cmd_flags;
-       enum rq_cmd_type_bits cmd_type;
+       unsigned cmd_type;
         unsigned long atomic_flags;
  
         int cpu;
@@ -216,19 +204,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
         return req->ioprio;
  }
  
-/*
- * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
- * requests. Some step values could eventually be made generic.
- */
-struct request_pm_state
-{
-       /* PM state machine step value, currently driver specific */
-       int     pm_step;
-       /* requested PM state value (S1, S2, S3, S4, ...) */
-       u32     pm_state;
-       void*   data;           /* for driver use */
-};
-
  #include <linux/elevator.h>
  
  struct blk_queue_ctx;
@@ -469,7 +444,7 @@ struct request_queue {
         struct mutex            sysfs_lock;
  
         int                     bypass_depth;
-       int                     mq_freeze_depth;
+       atomic_t                mq_freeze_depth;
  
  #if defined(CONFIG_BLK_DEV_BSG)
         bsg_job_fn              *bsg_job_fn;
@@ -610,10 +585,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
         (((rq)->cmd_flags & REQ_STARTED) && \
          ((rq)->cmd_type == REQ_TYPE_FS))
  
-#define blk_pm_request(rq)     \
-       ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
-        (rq)->cmd_type == REQ_TYPE_PM_RESUME)
-
  #define blk_rq_cpu_valid(rq)   ((rq)->cpu != -1)
  #define blk_bidi_rq(rq)                ((rq)->next_rq != NULL)
  /* rq->queuelist of dequeued request must be list_empty() */
@@ -804,11 +775,7 @@ extern void blk_add_request_payload(struct request *rq, struct page *page,
                 unsigned int len);
  extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
  extern int blk_lld_busy(struct request_queue *q);
-extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-                            struct bio_set *bs, gfp_t gfp_mask,
-                            int (*bio_ctr)(struct bio *, struct bio *, void *),
-                            void *data);
-extern void blk_rq_unprep_clone(struct request *rq);
+extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src);
  extern int blk_insert_cloned_request(struct request_queue *q,
                                      struct request *rq);
  extern void blk_delay_queue(struct request_queue *, unsigned long);
@@ -845,6 +812,7 @@ extern void blk_stop_queue(struct request_queue *q);
  extern void blk_sync_queue(struct request_queue *q);
  extern void __blk_stop_queue(struct request_queue *q);
  extern void __blk_run_queue(struct request_queue *q);
+extern void __blk_run_queue_uncond(struct request_queue *q);
  extern void blk_run_queue(struct request_queue *);
  extern void blk_run_queue_async(struct request_queue *q);
  extern int blk_rq_map_user(struct request_queue *, struct request *,
@@ -933,7 +901,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
         if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
                 return q->limits.max_hw_sectors;
  
-       if (!q->limits.chunk_sectors)
+       if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
                 return blk_queue_get_max_sectors(q, rq->cmd_flags);
  
         return min(blk_max_size_offset(q, blk_rq_pos(rq)),
@@ -1054,6 +1022,7 @@ bool __must_check blk_get_queue(struct request_queue *);
  struct request_queue *blk_alloc_queue(gfp_t);
  struct request_queue *blk_alloc_queue_node(gfp_t, int);
  extern void blk_put_queue(struct request_queue *);
+extern void blk_set_queue_dying(struct request_queue *);
  
  /*
   * block layer runtime pm functions
diff --git a/include/linux/elevator.h b/include/linux/elevator.h

index 45a91474487daff11a69bf55a31d7cd146c75f72..638b324f0291c2466fe285a0c4e94dee11fd87f0 100644 (file)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -39,6 +39,7 @@ typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct reques
  typedef int (elevator_init_fn) (struct request_queue *,
                                 struct elevator_type *e);
  typedef void (elevator_exit_fn) (struct elevator_queue *);
+typedef void (elevator_registered_fn) (struct request_queue *);
  
  struct elevator_ops
  {
@@ -68,6 +69,7 @@ struct elevator_ops
  
         elevator_init_fn *elevator_init_fn;
         elevator_exit_fn *elevator_exit_fn;
+       elevator_registered_fn *elevator_registered_fn;
  };
  
  #define ELV_NAME_MAX   (16)
diff --git a/include/linux/fs.h b/include/linux/fs.h

index b577e801b4af17ddd3288e28c209b644415cc63c..5db7b1379d174848116124b5f9c26fc212798d21 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2280,6 +2280,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
  extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
                                               void *holder);
  extern void blkdev_put(struct block_device *bdev, fmode_t mode);
+extern int __blkdev_reread_part(struct block_device *bdev);
+extern int blkdev_reread_part(struct block_device *bdev);
+
  #ifdef CONFIG_SYSFS
  extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
  extern void bd_unlink_disk_holder(struct block_device *bdev,
diff --git a/include/linux/ide.h b/include/linux/ide.h

index 93b5ca754b5b4c4931aa2b3f66cf246312cedbd8..a633898f36ac83f387f6de86350f7768a3d73b6e 100644 (file)
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -39,6 +39,19 @@
  
  struct device;
  
+/* IDE-specific values for req->cmd_type */
+enum ata_cmd_type_bits {
+       REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1,
+       REQ_TYPE_ATA_PC,
+       REQ_TYPE_ATA_SENSE,     /* sense request */
+       REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */
+       REQ_TYPE_ATA_PM_RESUME, /* resume request */
+};
+
+#define ata_pm_request(rq)     \
+       ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \
+        (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME)
+
  /* Error codes returned in rq->errors to the higher part of the driver. */
  enum {
         IDE_DRV_ERROR_GENERAL   = 101,
@@ -1314,6 +1327,19 @@ struct ide_port_info {
         u8                      udma_mask;
  };
  
+/*
+ * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME
+ * requests.
+ */
+struct ide_pm_state {
+       /* PM state machine step value, currently driver specific */
+       int     pm_step;
+       /* requested PM state value (S1, S2, S3, S4, ...) */
+       u32     pm_state;
+       void*   data;           /* for driver use */
+};
+
+
  int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
  int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
                      const struct ide_port_info *, void *);
@@ -1551,4 +1577,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data)
  #define ide_host_for_each_port(i, port, host) \
         for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
  
+
  #endif /* _IDE_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h

index cee108cbe2d52e15ada2c399a800145797e37eff..38874729dc5fd8989af223debcb9bd0ddb86199c 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err);
  extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
         void (*end_write_func)(struct bio *, int));
  extern int swap_set_page_dirty(struct page *page);
-extern void end_swap_bio_read(struct bio *bio, int err);
  
  int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
                 unsigned long nr_pages, sector_t start_block);
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h

index 4f52549b23ff8765e0c5cfcf599190eca897de87..e08e413d5f71c916e3d666a7f79bc6b977ba98e4 100644 (file)
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -44,8 +44,6 @@ enum {
  /* there is a gap here to match userspace */
  #define NBD_FLAG_SEND_TRIM    (1 << 5) /* send trim/discard */
  
-#define nbd_cmd(req) ((req)->cmd[0])
-
  /* userspace doesn't need the nbd_device structure */
  
  /* These are sent over the network in the request/reply magic fields */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile

index 29472bff11ef9b1c902e71a7ae9823a9e36c06e6..cb880a14cc396eec6af959007011fd6830b14baa 100644 (file)
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP)  += console.o
  obj-$(CONFIG_FREEZER)          += process.o
  obj-$(CONFIG_SUSPEND)          += suspend.o
  obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
-obj-$(CONFIG_HIBERNATION)      += hibernate.o snapshot.o swap.o user.o \
-                                  block_io.o
+obj-$(CONFIG_HIBERNATION)      += hibernate.o snapshot.o swap.o user.o
  obj-$(CONFIG_PM_AUTOSLEEP)     += autosleep.o
  obj-$(CONFIG_PM_WAKELOCKS)     += wakelock.o
  
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c

deleted file mode 100644 (file)

index 9a58bc2..0000000
--- a/kernel/power/block_io.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * This file provides functions for block I/O operations on swap/file.
- *
- * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
- * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
- *
- * This file is released under the GPLv2.
- */
-
-#include <linux/bio.h>
-#include <linux/kernel.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-
-#include "power.h"
-
-/**
- *     submit - submit BIO request.
- *     @rw:    READ or WRITE.
- *     @off    physical offset of page.
- *     @page:  page we're reading or writing.
- *     @bio_chain: list of pending biod (for async reading)
- *
- *     Straight from the textbook - allocate and initialize the bio.
- *     If we're reading, make sure the page is marked as dirty.
- *     Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, struct block_device *bdev, sector_t sector,
-               struct page *page, struct bio **bio_chain)
-{
-       const int bio_rw = rw | REQ_SYNC;
-       struct bio *bio;
-
-       bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
-       bio->bi_iter.bi_sector = sector;
-       bio->bi_bdev = bdev;
-       bio->bi_end_io = end_swap_bio_read;
-
-       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
-               printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
-                       (unsigned long long)sector);
-               bio_put(bio);
-               return -EFAULT;
-       }
-
-       lock_page(page);
-       bio_get(bio);
-
-       if (bio_chain == NULL) {
-               submit_bio(bio_rw, bio);
-               wait_on_page_locked(page);
-               if (rw == READ)
-                       bio_set_pages_dirty(bio);
-               bio_put(bio);
-       } else {
-               if (rw == READ)
-                       get_page(page); /* These pages are freed later */
-               bio->bi_private = *bio_chain;
-               *bio_chain = bio;
-               submit_bio(bio_rw, bio);
-       }
-       return 0;
-}
-
-int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
-       return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
-                       virt_to_page(addr), bio_chain);
-}
-
-int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
-       return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
-                       virt_to_page(addr), bio_chain);
-}
-
-int hib_wait_on_bio_chain(struct bio **bio_chain)
-{
-       struct bio *bio;
-       struct bio *next_bio;
-       int ret = 0;
-
-       if (bio_chain == NULL)
-               return 0;
-
-       bio = *bio_chain;
-       if (bio == NULL)
-               return 0;
-       while (bio) {
-               struct page *page;
-
-               next_bio = bio->bi_private;
-               page = bio->bi_io_vec[0].bv_page;
-               wait_on_page_locked(page);
-               if (!PageUptodate(page) || PageError(page))
-                       ret = -EIO;
-               put_page(page);
-               bio_put(bio);
-               bio = next_bio;
-       }
-       *bio_chain = NULL;
-       return ret;
-}
diff --git a/kernel/power/power.h b/kernel/power/power.h

index ce9b8328a689111ca26c1ed5e46c95b58de7fb6e..caadb566e82bb51a5348d6ba67a73bda8c99f37d 100644 (file)
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t);
  extern int swsusp_unmark(void);
  #endif
  
-/* kernel/power/block_io.c */
-extern struct block_device *hib_resume_bdev;
-
-extern int hib_bio_read_page(pgoff_t page_off, void *addr,
-               struct bio **bio_chain);
-extern int hib_bio_write_page(pgoff_t page_off, void *addr,
-               struct bio **bio_chain);
-extern int hib_wait_on_bio_chain(struct bio **bio_chain);
-
  struct timeval;
  /* kernel/power/swsusp.c */
  extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c

index 570aff817543fc8484b08ecdba09da4751acf060..2f30ca91e4fadfa7fa91cc8e62bf1ac7b9e35556 100644 (file)
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -212,7 +212,84 @@ int swsusp_swap_in_use(void)
   */
  
  static unsigned short root_swap = 0xffff;
-struct block_device *hib_resume_bdev;
+static struct block_device *hib_resume_bdev;
+
+struct hib_bio_batch {
+       atomic_t                count;
+       wait_queue_head_t       wait;
+       int                     error;
+};
+
+static void hib_init_batch(struct hib_bio_batch *hb)
+{
+       atomic_set(&hb->count, 0);
+       init_waitqueue_head(&hb->wait);
+       hb->error = 0;
+}
+
+static void hib_end_io(struct bio *bio, int error)
+{
+       struct hib_bio_batch *hb = bio->bi_private;
+       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       struct page *page = bio->bi_io_vec[0].bv_page;
+
+       if (!uptodate || error) {
+               printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
+                               imajor(bio->bi_bdev->bd_inode),
+                               iminor(bio->bi_bdev->bd_inode),
+                               (unsigned long long)bio->bi_iter.bi_sector);
+
+               if (!error)
+                       error = -EIO;
+       }
+
+       if (bio_data_dir(bio) == WRITE)
+               put_page(page);
+
+       if (error && !hb->error)
+               hb->error = error;
+       if (atomic_dec_and_test(&hb->count))
+               wake_up(&hb->wait);
+
+       bio_put(bio);
+}
+
+static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
+               struct hib_bio_batch *hb)
+{
+       struct page *page = virt_to_page(addr);
+       struct bio *bio;
+       int error = 0;
+
+       bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+       bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
+       bio->bi_bdev = hib_resume_bdev;
+
+       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+               printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+                       (unsigned long long)bio->bi_iter.bi_sector);
+               bio_put(bio);
+               return -EFAULT;
+       }
+
+       if (hb) {
+               bio->bi_end_io = hib_end_io;
+               bio->bi_private = hb;
+               atomic_inc(&hb->count);
+               submit_bio(rw, bio);
+       } else {
+               error = submit_bio_wait(rw, bio);
+               bio_put(bio);
+       }
+
+       return error;
+}
+
+static int hib_wait_io(struct hib_bio_batch *hb)
+{
+       wait_event(hb->wait, atomic_read(&hb->count) == 0);
+       return hb->error;
+}
  
  /*
   * Saving part
@@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
  {
         int error;
  
-       hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+       hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
         if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
             !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
                 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
@@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
                 swsusp_header->flags = flags;
                 if (flags & SF_CRC32_MODE)
                         swsusp_header->crc32 = handle->crc32;
-               error = hib_bio_write_page(swsusp_resume_block,
+               error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
                                         swsusp_header, NULL);
         } else {
                 printk(KERN_ERR "PM: Swap header not found!\n");
@@ -271,10 +348,10 @@ static int swsusp_swap_check(void)
   *     write_page - Write one page to given swap location.
   *     @buf:           Address we're writing.
   *     @offset:        Offset of the swap page we're writing to.
- *     @bio_chain:     Link the next write BIO here
+ *     @hb:            bio completion batch
   */
  
-static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
+static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
  {
         void *src;
         int ret;
@@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
         if (!offset)
                 return -ENOSPC;
  
-       if (bio_chain) {
+       if (hb) {
                 src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
                                               __GFP_NORETRY);
                 if (src) {
                         copy_page(src, buf);
                 } else {
-                       ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+                       ret = hib_wait_io(hb); /* Free pages */
                         if (ret)
                                 return ret;
                         src = (void *)__get_free_page(__GFP_WAIT |
@@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
                                 copy_page(src, buf);
                         } else {
                                 WARN_ON_ONCE(1);
-                               bio_chain = NULL;       /* Go synchronous */
+                               hb = NULL;      /* Go synchronous */
                                 src = buf;
                         }
                 }
         } else {
                 src = buf;
         }
-       return hib_bio_write_page(offset, src, bio_chain);
+       return hib_submit_io(WRITE_SYNC, offset, src, hb);
  }
  
  static void release_swap_writer(struct swap_map_handle *handle)
@@ -348,7 +425,7 @@ err_close:
  }
  
  static int swap_write_page(struct swap_map_handle *handle, void *buf,
-                               struct bio **bio_chain)
+               struct hib_bio_batch *hb)
  {
         int error = 0;
         sector_t offset;
@@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
         if (!handle->cur)
                 return -EINVAL;
         offset = alloc_swapdev_block(root_swap);
-       error = write_page(buf, offset, bio_chain);
+       error = write_page(buf, offset, hb);
         if (error)
                 return error;
         handle->cur->entries[handle->k++] = offset;
@@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
                 if (!offset)
                         return -ENOSPC;
                 handle->cur->next_swap = offset;
-               error = write_page(handle->cur, handle->cur_swap, bio_chain);
+               error = write_page(handle->cur, handle->cur_swap, hb);
                 if (error)
                         goto out;
                 clear_page(handle->cur);
                 handle->cur_swap = offset;
                 handle->k = 0;
  
-               if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
-                       error = hib_wait_on_bio_chain(bio_chain);
+               if (hb && low_free_pages() <= handle->reqd_free_pages) {
+                       error = hib_wait_io(hb);
                         if (error)
                                 goto out;
                         /*
@@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle,
         int ret;
         int nr_pages;
         int err2;
-       struct bio *bio;
+       struct hib_bio_batch hb;
         ktime_t start;
         ktime_t stop;
  
+       hib_init_batch(&hb);
+
         printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
                 nr_to_write);
         m = nr_to_write / 10;
         if (!m)
                 m = 1;
         nr_pages = 0;
-       bio = NULL;
         start = ktime_get();
         while (1) {
                 ret = snapshot_read_next(snapshot);
                 if (ret <= 0)
                         break;
-               ret = swap_write_page(handle, data_of(*snapshot), &bio);
+               ret = swap_write_page(handle, data_of(*snapshot), &hb);
                 if (ret)
                         break;
                 if (!(nr_pages % m))
@@ -469,7 +547,7 @@ static int save_image(struct swap_map_handle *handle,
                                nr_pages / m * 10);
                 nr_pages++;
         }
-       err2 = hib_wait_on_bio_chain(&bio);
+       err2 = hib_wait_io(&hb);
         stop = ktime_get();
         if (!ret)
                 ret = err2;
@@ -580,7 +658,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
         int ret = 0;
         int nr_pages;
         int err2;
-       struct bio *bio;
+       struct hib_bio_batch hb;
         ktime_t start;
         ktime_t stop;
         size_t off;
@@ -589,6 +667,8 @@ static int save_image_lzo(struct swap_map_handle *handle,
         struct cmp_data *data = NULL;
         struct crc_data *crc = NULL;
  
+       hib_init_batch(&hb);
+
         /*
          * We'll limit the number of threads for compression to limit memory
          * footprint.
@@ -674,7 +754,6 @@ static int save_image_lzo(struct swap_map_handle *handle,
         if (!m)
                 m = 1;
         nr_pages = 0;
-       bio = NULL;
         start = ktime_get();
         for (;;) {
                 for (thr = 0; thr < nr_threads; thr++) {
@@ -748,7 +827,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
                              off += PAGE_SIZE) {
                                 memcpy(page, data[thr].cmp + off, PAGE_SIZE);
  
-                               ret = swap_write_page(handle, page, &bio);
+                               ret = swap_write_page(handle, page, &hb);
                                 if (ret)
                                         goto out_finish;
                         }
@@ -759,7 +838,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
         }
  
  out_finish:
-       err2 = hib_wait_on_bio_chain(&bio);
+       err2 = hib_wait_io(&hb);
         stop = ktime_get();
         if (!ret)
                 ret = err2;
@@ -906,7 +985,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
                         return -ENOMEM;
                 }
  
-               error = hib_bio_read_page(offset, tmp->map, NULL);
+               error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL);
                 if (error) {
                         release_swap_reader(handle);
                         return error;
@@ -919,7 +998,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
  }
  
  static int swap_read_page(struct swap_map_handle *handle, void *buf,
-                               struct bio **bio_chain)
+               struct hib_bio_batch *hb)
  {
         sector_t offset;
         int error;
@@ -930,7 +1009,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
         offset = handle->cur->entries[handle->k];
         if (!offset)
                 return -EFAULT;
-       error = hib_bio_read_page(offset, buf, bio_chain);
+       error = hib_submit_io(READ_SYNC, offset, buf, hb);
         if (error)
                 return error;
         if (++handle->k >= MAP_PAGE_ENTRIES) {
@@ -968,27 +1047,28 @@ static int load_image(struct swap_map_handle *handle,
         int ret = 0;
         ktime_t start;
         ktime_t stop;
-       struct bio *bio;
+       struct hib_bio_batch hb;
         int err2;
         unsigned nr_pages;
  
+       hib_init_batch(&hb);
+
         printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
                 nr_to_read);
         m = nr_to_read / 10;
         if (!m)
                 m = 1;
         nr_pages = 0;
-       bio = NULL;
         start = ktime_get();
         for ( ; ; ) {
                 ret = snapshot_write_next(snapshot);
                 if (ret <= 0)
                         break;
-               ret = swap_read_page(handle, data_of(*snapshot), &bio);
+               ret = swap_read_page(handle, data_of(*snapshot), &hb);
                 if (ret)
                         break;
                 if (snapshot->sync_read)
-                       ret = hib_wait_on_bio_chain(&bio);
+                       ret = hib_wait_io(&hb);
                 if (ret)
                         break;
                 if (!(nr_pages % m))
@@ -996,7 +1076,7 @@ static int load_image(struct swap_map_handle *handle,
                                nr_pages / m * 10);
                 nr_pages++;
         }
-       err2 = hib_wait_on_bio_chain(&bio);
+       err2 = hib_wait_io(&hb);
         stop = ktime_get();
         if (!ret)
                 ret = err2;
@@ -1067,7 +1147,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
         unsigned int m;
         int ret = 0;
         int eof = 0;
-       struct bio *bio;
+       struct hib_bio_batch hb;
         ktime_t start;
         ktime_t stop;
         unsigned nr_pages;
@@ -1080,6 +1160,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
         struct dec_data *data = NULL;
         struct crc_data *crc = NULL;
  
+       hib_init_batch(&hb);
+
         /*
          * We'll limit the number of threads for decompression to limit memory
          * footprint.
@@ -1190,7 +1272,6 @@ static int load_image_lzo(struct swap_map_handle *handle,
         if (!m)
                 m = 1;
         nr_pages = 0;
-       bio = NULL;
         start = ktime_get();
  
         ret = snapshot_write_next(snapshot);
@@ -1199,7 +1280,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
  
         for(;;) {
                 for (i = 0; !eof && i < want; i++) {
-                       ret = swap_read_page(handle, page[ring], &bio);
+                       ret = swap_read_page(handle, page[ring], &hb);
                         if (ret) {
                                 /*
                                  * On real read error, finish. On end of data,
@@ -1226,7 +1307,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
                         if (!asked)
                                 break;
  
-                       ret = hib_wait_on_bio_chain(&bio);
+                       ret = hib_wait_io(&hb);
                         if (ret)
                                 goto out_finish;
                         have += asked;
@@ -1281,7 +1362,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
                  * Wait for more data while we are decompressing.
                  */
                 if (have < LZO_CMP_PAGES && asked) {
-                       ret = hib_wait_on_bio_chain(&bio);
+                       ret = hib_wait_io(&hb);
                         if (ret)
                                 goto out_finish;
                         have += asked;
@@ -1430,7 +1511,7 @@ int swsusp_check(void)
         if (!IS_ERR(hib_resume_bdev)) {
                 set_blocksize(hib_resume_bdev, PAGE_SIZE);
                 clear_page(swsusp_header);
-               error = hib_bio_read_page(swsusp_resume_block,
+               error = hib_submit_io(READ_SYNC, swsusp_resume_block,
                                         swsusp_header, NULL);
                 if (error)
                         goto put;
@@ -1438,7 +1519,7 @@ int swsusp_check(void)
                 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
                         memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
                         /* Reset swap signature now */
-                       error = hib_bio_write_page(swsusp_resume_block,
+                       error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
                                                 swsusp_header, NULL);
                 } else {
                         error = -EINVAL;
@@ -1482,10 +1563,10 @@ int swsusp_unmark(void)
  {
         int error;
  
-       hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+       hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
         if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
                 memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
-               error = hib_bio_write_page(swsusp_resume_block,
+               error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
                                         swsusp_header, NULL);
         } else {
                 printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
diff --git a/mm/page_io.c b/mm/page_io.c

index 6424869e275e2aa2d09debfa791ea08302ac68be..520baa4b04d75db5acd8711e6a693db634fb8990 100644 (file)
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -69,7 +69,7 @@ void end_swap_bio_write(struct bio *bio, int err)
         bio_put(bio);
  }
  
-void end_swap_bio_read(struct bio *bio, int err)
+static void end_swap_bio_read(struct bio *bio, int err)
  {
         const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct page *page = bio->bi_io_vec[0].bv_page;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 25 Jun 2015 21:29:53 +0000 (14:29 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 25 Jun 2015 21:29:53 +0000 (14:29 -0700)
MAINTAINERS		patch \| blob \| history
block/bio-integrity.c		patch \| blob \| history
block/bio.c		patch \| blob \| history
block/blk-cgroup.c		patch \| blob \| history
block/blk-cgroup.h		patch \| blob \| history
block/blk-core.c		patch \| blob \| history
block/blk-exec.c		patch \| blob \| history
block/blk-merge.c		patch \| blob \| history
block/blk-mq-tag.c		patch \| blob \| history
block/blk-mq-tag.h		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
block/blk.h		patch \| blob \| history
block/bounce.c		patch \| blob \| history
block/cfq-iosched.c		patch \| blob \| history
block/elevator.c		patch \| blob \| history
block/ioctl.c		patch \| blob \| history
drivers/block/nbd.c		patch \| blob \| history
drivers/block/paride/pd.c		patch \| blob \| history
drivers/block/sx8.c		patch \| blob \| history
drivers/block/virtio_blk.c		patch \| blob \| history
drivers/ide/ide-atapi.c		patch \| blob \| history
drivers/ide/ide-cd.c		patch \| blob \| history
drivers/ide/ide-cd_ioctl.c		patch \| blob \| history
drivers/ide/ide-devsets.c		patch \| blob \| history
drivers/ide/ide-eh.c		patch \| blob \| history
drivers/ide/ide-floppy.c		patch \| blob \| history
drivers/ide/ide-io.c		patch \| blob \| history
drivers/ide/ide-ioctls.c		patch \| blob \| history
drivers/ide/ide-park.c		patch \| blob \| history
drivers/ide/ide-pm.c		patch \| blob \| history
drivers/ide/ide-tape.c		patch \| blob \| history
drivers/ide/ide-taskfile.c		patch \| blob \| history
drivers/md/bcache/io.c		patch \| blob \| history
drivers/md/bcache/request.c		patch \| blob \| history
drivers/md/dm-cache-target.c		patch \| blob \| history
drivers/md/dm-raid1.c		patch \| blob \| history
drivers/md/dm-snap.c		patch \| blob \| history
drivers/md/dm-table.c		patch \| blob \| history
drivers/md/dm-thin.c		patch \| blob \| history
drivers/md/dm-verity.c		patch \| blob \| history
drivers/md/dm.c		patch \| blob \| history
drivers/md/dm.h		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent_io.c		patch \| blob \| history
fs/btrfs/volumes.c		patch \| blob \| history
fs/btrfs/volumes.h		patch \| blob \| history
fs/buffer.c		patch \| blob \| history
fs/ext4/page-io.c		patch \| blob \| history
fs/nilfs2/segbuf.c		patch \| blob \| history
fs/xfs/xfs_aops.c		patch \| blob \| history
include/linux/bio.h		patch \| blob \| history
include/linux/blk-mq.h		patch \| blob \| history
include/linux/blk_types.h		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/linux/elevator.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/ide.h		patch \| blob \| history
include/linux/swap.h		patch \| blob \| history
include/uapi/linux/nbd.h		patch \| blob \| history
kernel/power/Makefile		patch \| blob \| history
kernel/power/block_io.c	[deleted file]	patch \| blob \| history
kernel/power/power.h		patch \| blob \| history
kernel/power/swap.c		patch \| blob \| history
mm/page_io.c		patch \| blob \| history