FRV: Optimise the system call exit path in entry.S [ver #2]
[firefly-linux-kernel-4.4.55.git] / block / blk-core.c
index 1f61b74867e41d3f74f61aeec539e8b00157dacf..3c923a7aeb56f1658142b091868c3c29ebffd3c5 100644 (file)
 #include <linux/fault-inject.h>
 #include <linux/list_sort.h>
 #include <linux/delay.h>
+#include <linux/ratelimit.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
 
 #include "blk.h"
+#include "blk-cgroup.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -280,7 +282,7 @@ EXPORT_SYMBOL(blk_stop_queue);
  *
  *     This function does not cancel any asynchronous activity arising
  *     out of elevator or throttling code. That would require elevaotor_exit()
- *     and blk_throtl_exit() to be called with queue lock initialized.
+ *     and blkcg_exit_queue() to be called with queue lock initialized.
  *
  */
 void blk_sync_queue(struct request_queue *q)
@@ -365,17 +367,23 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 
                spin_lock_irq(q->queue_lock);
 
-               elv_drain_elevator(q);
-               if (drain_all)
-                       blk_throtl_drain(q);
+               /*
+                * The caller might be trying to drain @q before its
+                * elevator is initialized.
+                */
+               if (q->elevator)
+                       elv_drain_elevator(q);
+
+               blkcg_drain_queue(q);
 
                /*
                 * This function might be called on a queue which failed
-                * driver init after queue creation.  Some drivers
-                * (e.g. fd) get unhappy in such cases.  Kick queue iff
-                * dispatch queue has something on it.
+                * driver init after queue creation or is not yet fully
+                * active yet.  Some drivers (e.g. fd and loop) get unhappy
+                * in such cases.  Kick queue iff dispatch queue has
+                * something on it and @q has request_fn set.
                 */
-               if (!list_empty(&q->queue_head))
+               if (!list_empty(&q->queue_head) && q->request_fn)
                        __blk_run_queue(q);
 
                drain |= q->rq.elvpriv;
@@ -402,6 +410,49 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
        }
 }
 
+/**
+ * blk_queue_bypass_start - enter queue bypass mode
+ * @q: queue of interest
+ *
+ * In bypass mode, only the dispatch FIFO queue of @q is used.  This
+ * function makes @q enter bypass mode and drains all requests which were
+ * throttled or issued before.  On return, it's guaranteed that no request
+ * is being throttled or has ELVPRIV set and blk_queue_bypass() %true
+ * inside queue or RCU read lock.
+ */
+void blk_queue_bypass_start(struct request_queue *q)
+{
+       bool drain;
+
+       spin_lock_irq(q->queue_lock);
+       drain = !q->bypass_depth++;
+       queue_flag_set(QUEUE_FLAG_BYPASS, q);
+       spin_unlock_irq(q->queue_lock);
+
+       if (drain) {
+               blk_drain_queue(q, false);
+               /* ensure blk_queue_bypass() is %true inside RCU read lock */
+               synchronize_rcu();
+       }
+}
+EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
+
+/**
+ * blk_queue_bypass_end - leave queue bypass mode
+ * @q: queue of interest
+ *
+ * Leave bypass mode and restore the normal queueing behavior.
+ */
+void blk_queue_bypass_end(struct request_queue *q)
+{
+       spin_lock_irq(q->queue_lock);
+       if (!--q->bypass_depth)
+               queue_flag_clear(QUEUE_FLAG_BYPASS, q);
+       WARN_ON_ONCE(q->bypass_depth < 0);
+       spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
+
 /**
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
@@ -418,6 +469,19 @@ void blk_cleanup_queue(struct request_queue *q)
        queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 
        spin_lock_irq(lock);
+
+       /*
+        * Dead queue is permanently in bypass mode till released.  Note
+        * that, unlike blk_queue_bypass_start(), we aren't performing
+        * synchronize_rcu() after entering bypass mode to avoid the delay
+        * as some drivers create and destroy a lot of queues while
+        * probing.  This is still safe because blk_release_queue() will be
+        * called only after the queue refcnt drops to zero and nothing,
+        * RCU or not, would be traversing the queue by then.
+        */
+       q->bypass_depth++;
+       queue_flag_set(QUEUE_FLAG_BYPASS, q);
+
        queue_flag_set(QUEUE_FLAG_NOMERGES, q);
        queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
        queue_flag_set(QUEUE_FLAG_DEAD, q);
@@ -428,13 +492,8 @@ void blk_cleanup_queue(struct request_queue *q)
        spin_unlock_irq(lock);
        mutex_unlock(&q->sysfs_lock);
 
-       /*
-        * Drain all requests queued before DEAD marking.  The caller might
-        * be trying to tear down @q before its elevator is initialized, in
-        * which case we don't want to call into draining.
-        */
-       if (q->elevator)
-               blk_drain_queue(q, true);
+       /* drain all requests queued before DEAD marking */
+       blk_drain_queue(q, true);
 
        /* @q won't process any more request, flush async actions */
        del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
@@ -498,14 +557,15 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (err)
                goto fail_id;
 
-       if (blk_throtl_init(q))
-               goto fail_id;
-
        setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                    laptop_mode_timer_fn, (unsigned long) q);
        setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+       INIT_LIST_HEAD(&q->queue_head);
        INIT_LIST_HEAD(&q->timeout_list);
        INIT_LIST_HEAD(&q->icq_list);
+#ifdef CONFIG_BLK_CGROUP
+       INIT_LIST_HEAD(&q->blkg_list);
+#endif
        INIT_LIST_HEAD(&q->flush_queue[0]);
        INIT_LIST_HEAD(&q->flush_queue[1]);
        INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -522,6 +582,18 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         */
        q->queue_lock = &q->__queue_lock;
 
+       /*
+        * A queue starts its life with bypass turned on to avoid
+        * unnecessary bypass on/off overhead and nasty surprises during
+        * init.  The initial bypass will be finished at the end of
+        * blk_init_allocated_queue().
+        */
+       q->bypass_depth = 1;
+       __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
+
+       if (blkcg_init_queue(q))
+               goto fail_id;
+
        return q;
 
 fail_id:
@@ -614,15 +686,15 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 
        q->sg_reserved_size = INT_MAX;
 
-       /*
-        * all done
-        */
-       if (!elevator_init(q, NULL)) {
-               blk_queue_congestion_threshold(q);
-               return q;
-       }
+       /* init elevator */
+       if (elevator_init(q, NULL))
+               return NULL;
 
-       return NULL;
+       blk_queue_congestion_threshold(q);
+
+       /* all done, end the initial bypass */
+       blk_queue_bypass_end(q);
+       return q;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
 
@@ -648,33 +720,6 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
        mempool_free(rq, q->rq.rq_pool);
 }
 
-static struct request *
-blk_alloc_request(struct request_queue *q, struct io_cq *icq,
-                 unsigned int flags, gfp_t gfp_mask)
-{
-       struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
-
-       if (!rq)
-               return NULL;
-
-       blk_rq_init(q, rq);
-
-       rq->cmd_flags = flags | REQ_ALLOCED;
-
-       if (flags & REQ_ELVPRIV) {
-               rq->elv.icq = icq;
-               if (unlikely(elv_set_request(q, rq, gfp_mask))) {
-                       mempool_free(rq, q->rq.rq_pool);
-                       return NULL;
-               }
-               /* @rq->elv.icq holds on to io_context until @rq is freed */
-               if (icq)
-                       get_io_context(icq->ioc);
-       }
-
-       return rq;
-}
-
 /*
  * ioc_batching returns true if the ioc is a valid batching request and
  * should be given priority access to a request.
@@ -762,6 +807,22 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
        return true;
 }
 
+/**
+ * rq_ioc - determine io_context for request allocation
+ * @bio: request being allocated is for this bio (can be %NULL)
+ *
+ * Determine io_context to use for request allocation for @bio.  May return
+ * %NULL if %current->io_context doesn't exist.
+ */
+static struct io_context *rq_ioc(struct bio *bio)
+{
+#ifdef CONFIG_BLK_CGROUP
+       if (bio && bio->bi_ioc)
+               return bio->bi_ioc;
+#endif
+       return current->io_context;
+}
+
 /**
  * get_request - get a free request
  * @q: request_queue to allocate request from
@@ -779,7 +840,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 static struct request *get_request(struct request_queue *q, int rw_flags,
                                   struct bio *bio, gfp_t gfp_mask)
 {
-       struct request *rq = NULL;
+       struct request *rq;
        struct request_list *rl = &q->rq;
        struct elevator_type *et;
        struct io_context *ioc;
@@ -789,7 +850,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        int may_queue;
 retry:
        et = q->elevator->type;
-       ioc = current->io_context;
+       ioc = rq_ioc(bio);
 
        if (unlikely(blk_queue_dead(q)))
                return NULL;
@@ -808,7 +869,7 @@ retry:
                         */
                        if (!ioc && !retried) {
                                spin_unlock_irq(q->queue_lock);
-                               create_io_context(current, gfp_mask, q->node);
+                               create_io_context(gfp_mask, q->node);
                                spin_lock_irq(q->queue_lock);
                                retried = true;
                                goto retry;
@@ -831,7 +892,7 @@ retry:
                                         * process is not a "batcher", and not
                                         * exempted by the IO scheduler
                                         */
-                                       goto out;
+                                       return NULL;
                                }
                        }
                }
@@ -844,7 +905,7 @@ retry:
         * allocated with any setting of ->nr_requests
         */
        if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
-               goto out;
+               return NULL;
 
        rl->count[is_sync]++;
        rl->starved[is_sync] = 0;
@@ -859,8 +920,7 @@ retry:
         * Also, lookup icq while holding queue_lock.  If it doesn't exist,
         * it will be created after releasing queue_lock.
         */
-       if (blk_rq_should_init_elevator(bio) &&
-           !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
+       if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
                rw_flags |= REQ_ELVPRIV;
                rl->elvpriv++;
                if (et->icq_cache && ioc)
@@ -871,41 +931,36 @@ retry:
                rw_flags |= REQ_IO_STAT;
        spin_unlock_irq(q->queue_lock);
 
-       /* create icq if missing */
-       if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
-               icq = ioc_create_icq(q, gfp_mask);
-               if (!icq)
-                       goto fail_icq;
-       }
-
-       rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+       /* allocate and init request */
+       rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
+       if (!rq)
+               goto fail_alloc;
 
-fail_icq:
-       if (unlikely(!rq)) {
-               /*
-                * Allocation failed presumably due to memory. Undo anything
-                * we might have messed up.
-                *
-                * Allocating task should really be put onto the front of the
-                * wait queue, but this is pretty rare.
-                */
-               spin_lock_irq(q->queue_lock);
-               freed_request(q, rw_flags);
+       blk_rq_init(q, rq);
+       rq->cmd_flags = rw_flags | REQ_ALLOCED;
+
+       /* init elvpriv */
+       if (rw_flags & REQ_ELVPRIV) {
+               if (unlikely(et->icq_cache && !icq)) {
+                       create_io_context(gfp_mask, q->node);
+                       ioc = rq_ioc(bio);
+                       if (!ioc)
+                               goto fail_elvpriv;
+
+                       icq = ioc_create_icq(ioc, q, gfp_mask);
+                       if (!icq)
+                               goto fail_elvpriv;
+               }
 
-               /*
-                * in the very unlikely event that allocation failed and no
-                * requests for this direction was pending, mark us starved
-                * so that freeing of a request in the other direction will
-                * notice us. another possible fix would be to split the
-                * rq mempool into READ and WRITE
-                */
-rq_starved:
-               if (unlikely(rl->count[is_sync] == 0))
-                       rl->starved[is_sync] = 1;
+               rq->elv.icq = icq;
+               if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
+                       goto fail_elvpriv;
 
-               goto out;
+               /* @rq->elv.icq holds io_context until @rq is freed */
+               if (icq)
+                       get_io_context(icq->ioc);
        }
-
+out:
        /*
         * ioc may be NULL here, and ioc_batching will be false. That's
         * OK, if the queue is under the request limit then requests need
@@ -916,8 +971,48 @@ rq_starved:
                ioc->nr_batch_requests--;
 
        trace_block_getrq(q, bio, rw_flags & 1);
-out:
        return rq;
+
+fail_elvpriv:
+       /*
+        * elvpriv init failed.  ioc, icq and elvpriv aren't mempool backed
+        * and may fail indefinitely under memory pressure and thus
+        * shouldn't stall IO.  Treat this request as !elvpriv.  This will
+        * disturb iosched and blkcg but weird is bettern than dead.
+        */
+       printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n",
+                          dev_name(q->backing_dev_info.dev));
+
+       rq->cmd_flags &= ~REQ_ELVPRIV;
+       rq->elv.icq = NULL;
+
+       spin_lock_irq(q->queue_lock);
+       rl->elvpriv--;
+       spin_unlock_irq(q->queue_lock);
+       goto out;
+
+fail_alloc:
+       /*
+        * Allocation failed presumably due to memory. Undo anything we
+        * might have messed up.
+        *
+        * Allocating task should really be put onto the front of the wait
+        * queue, but this is pretty rare.
+        */
+       spin_lock_irq(q->queue_lock);
+       freed_request(q, rw_flags);
+
+       /*
+        * in the very unlikely event that allocation failed and no
+        * requests for this direction was pending, mark us starved so that
+        * freeing of a request in the other direction will notice
+        * us. another possible fix would be to split the rq mempool into
+        * READ and WRITE
+        */
+rq_starved:
+       if (unlikely(rl->count[is_sync] == 0))
+               rl->starved[is_sync] = 1;
+       return NULL;
 }
 
 /**
@@ -961,7 +1056,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                 * up to a big batch of them for a small period time.
                 * See ioc_batching, ioc_set_batching
                 */
-               create_io_context(current, GFP_NOIO, q->node);
+               create_io_context(GFP_NOIO, q->node);
                ioc_set_batching(q, current->io_context);
 
                spin_lock_irq(q->queue_lock);