Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...

[firefly-linux-kernel-4.4.55.git] / drivers / block / rbd.c
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c

index 8a86b62466f7ce72b54853b283e03fd495df8083..b40af3203089c846db053dfac879567230509299 100644 (file)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -38,6 +38,7 @@
  #include <linux/kernel.h>
  #include <linux/device.h>
  #include <linux/module.h>
+#include <linux/blk-mq.h>
  #include <linux/fs.h>
  #include <linux/blkdev.h>
  #include <linux/slab.h>
@@ -340,9 +341,7 @@ struct rbd_device {
  
         char                    name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
  
-       struct list_head        rq_queue;       /* incoming rq queue */
         spinlock_t              lock;           /* queue, flags, open_count */
-       struct work_struct      rq_work;
  
         struct rbd_image_header header;
         unsigned long           flags;          /* possibly lock protected */
@@ -360,6 +359,9 @@ struct rbd_device {
         atomic_t                parent_ref;
         struct rbd_device       *parent;
  
+       /* Block layer tags. */
+       struct blk_mq_tag_set   tag_set;
+
         /* protects updating the header */
         struct rw_semaphore     header_rwsem;
  
@@ -1817,7 +1819,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
  
         /*
          * We support a 64-bit length, but ultimately it has to be
-        * passed to blk_end_request(), which takes an unsigned int.
+        * passed to the block layer, which just supports a 32-bit
+        * length field.
          */
         obj_request->xferred = osd_req->r_reply_op_len[0];
         rbd_assert(obj_request->xferred < (u64)UINT_MAX);
@@ -2275,7 +2278,10 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
                 more = obj_request->which < img_request->obj_request_count - 1;
         } else {
                 rbd_assert(img_request->rq != NULL);
-               more = blk_end_request(img_request->rq, result, xferred);
+
+               more = blk_update_request(img_request->rq, result, xferred);
+               if (!more)
+                       __blk_mq_end_request(img_request->rq, result);
         }
  
         return more;
@@ -3304,8 +3310,10 @@ out:
         return ret;
  }
  
-static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
+static void rbd_queue_workfn(struct work_struct *work)
  {
+       struct request *rq = blk_mq_rq_from_pdu(work);
+       struct rbd_device *rbd_dev = rq->q->queuedata;
         struct rbd_img_request *img_request;
         struct ceph_snap_context *snapc = NULL;
         u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
@@ -3314,6 +3322,13 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
         u64 mapping_size;
         int result;
  
+       if (rq->cmd_type != REQ_TYPE_FS) {
+               dout("%s: non-fs request type %d\n", __func__,
+                       (int) rq->cmd_type);
+               result = -EIO;
+               goto err;
+       }
+
         if (rq->cmd_flags & REQ_DISCARD)
                 op_type = OBJ_OP_DISCARD;
         else if (rq->cmd_flags & REQ_WRITE)
@@ -3359,6 +3374,8 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
                 goto err_rq;    /* Shouldn't happen */
         }
  
+       blk_mq_start_request(rq);
+
         down_read(&rbd_dev->header_rwsem);
         mapping_size = rbd_dev->mapping.size;
         if (op_type != OBJ_OP_READ) {
@@ -3404,53 +3421,18 @@ err_rq:
                 rbd_warn(rbd_dev, "%s %llx at %llx result %d",
                          obj_op_name(op_type), length, offset, result);
         ceph_put_snap_context(snapc);
-       blk_end_request_all(rq, result);
+err:
+       blk_mq_end_request(rq, result);
  }
  
-static void rbd_request_workfn(struct work_struct *work)
+static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+               const struct blk_mq_queue_data *bd)
  {
-       struct rbd_device *rbd_dev =
-           container_of(work, struct rbd_device, rq_work);
-       struct request *rq, *next;
-       LIST_HEAD(requests);
-
-       spin_lock_irq(&rbd_dev->lock); /* rq->q->queue_lock */
-       list_splice_init(&rbd_dev->rq_queue, &requests);
-       spin_unlock_irq(&rbd_dev->lock);
+       struct request *rq = bd->rq;
+       struct work_struct *work = blk_mq_rq_to_pdu(rq);
  
-       list_for_each_entry_safe(rq, next, &requests, queuelist) {
-               list_del_init(&rq->queuelist);
-               rbd_handle_request(rbd_dev, rq);
-       }
-}
-
-/*
- * Called with q->queue_lock held and interrupts disabled, possibly on
- * the way to schedule().  Do not sleep here!
- */
-static void rbd_request_fn(struct request_queue *q)
-{
-       struct rbd_device *rbd_dev = q->queuedata;
-       struct request *rq;
-       int queued = 0;
-
-       rbd_assert(rbd_dev);
-
-       while ((rq = blk_fetch_request(q))) {
-               /* Ignore any non-FS requests that filter through. */
-               if (rq->cmd_type != REQ_TYPE_FS) {
-                       dout("%s: non-fs request type %d\n", __func__,
-                               (int) rq->cmd_type);
-                       __blk_end_request_all(rq, 0);
-                       continue;
-               }
-
-               list_add_tail(&rq->queuelist, &rbd_dev->rq_queue);
-               queued++;
-       }
-
-       if (queued)
-               queue_work(rbd_wq, &rbd_dev->rq_work);
+       queue_work(rbd_wq, work);
+       return BLK_MQ_RQ_QUEUE_OK;
  }
  
  /*
@@ -3511,6 +3493,7 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
                 del_gendisk(disk);
                 if (disk->queue)
                         blk_cleanup_queue(disk->queue);
+               blk_mq_free_tag_set(&rbd_dev->tag_set);
         }
         put_disk(disk);
  }
@@ -3694,7 +3677,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
  
         ret = rbd_dev_header_info(rbd_dev);
         if (ret)
-               return ret;
+               goto out;
  
         /*
          * If there is a parent, see if it has disappeared due to the
@@ -3703,30 +3686,46 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
         if (rbd_dev->parent) {
                 ret = rbd_dev_v2_parent_info(rbd_dev);
                 if (ret)
-                       return ret;
+                       goto out;
         }
  
         if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
-               if (rbd_dev->mapping.size != rbd_dev->header.image_size)
-                       rbd_dev->mapping.size = rbd_dev->header.image_size;
+               rbd_dev->mapping.size = rbd_dev->header.image_size;
         } else {
                 /* validate mapped snapshot's EXISTS flag */
                 rbd_exists_validate(rbd_dev);
         }
  
+out:
         up_write(&rbd_dev->header_rwsem);
-
-       if (mapping_size != rbd_dev->mapping.size)
+       if (!ret && mapping_size != rbd_dev->mapping.size)
                 rbd_dev_update_size(rbd_dev);
  
+       return ret;
+}
+
+static int rbd_init_request(void *data, struct request *rq,
+               unsigned int hctx_idx, unsigned int request_idx,
+               unsigned int numa_node)
+{
+       struct work_struct *work = blk_mq_rq_to_pdu(rq);
+
+       INIT_WORK(work, rbd_queue_workfn);
         return 0;
  }
  
+static struct blk_mq_ops rbd_mq_ops = {
+       .queue_rq       = rbd_queue_rq,
+       .map_queue      = blk_mq_map_queue,
+       .init_request   = rbd_init_request,
+};
+
  static int rbd_init_disk(struct rbd_device *rbd_dev)
  {
         struct gendisk *disk;
         struct request_queue *q;
         u64 segment_size;
+       int err;
  
         /* create gendisk info */
         disk = alloc_disk(single_major ?
@@ -3744,10 +3743,25 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
         disk->fops = &rbd_bd_ops;
         disk->private_data = rbd_dev;
  
-       q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
-       if (!q)
+       memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
+       rbd_dev->tag_set.ops = &rbd_mq_ops;
+       rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ;
+       rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
+       rbd_dev->tag_set.flags =
+               BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+       rbd_dev->tag_set.nr_hw_queues = 1;
+       rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
+
+       err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
+       if (err)
                 goto out_disk;
  
+       q = blk_mq_init_queue(&rbd_dev->tag_set);
+       if (IS_ERR(q)) {
+               err = PTR_ERR(q);
+               goto out_tag_set;
+       }
+
         /* We use the default size, but let's be explicit about it. */
         blk_queue_physical_block_size(q, SECTOR_SIZE);
  
@@ -3773,10 +3787,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
         rbd_dev->disk = disk;
  
         return 0;
+out_tag_set:
+       blk_mq_free_tag_set(&rbd_dev->tag_set);
  out_disk:
         put_disk(disk);
-
-       return -ENOMEM;
+       return err;
  }
  
  /*
@@ -4033,8 +4048,6 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
                 return NULL;
  
         spin_lock_init(&rbd_dev->lock);
-       INIT_LIST_HEAD(&rbd_dev->rq_queue);
-       INIT_WORK(&rbd_dev->rq_work, rbd_request_workfn);
         rbd_dev->flags = 0;
         atomic_set(&rbd_dev->parent_ref, 0);
         INIT_LIST_HEAD(&rbd_dev->node);
@@ -4274,32 +4287,22 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
         }
  
         /*
-        * We always update the parent overlap.  If it's zero we
-        * treat it specially.
+        * We always update the parent overlap.  If it's zero we issue
+        * a warning, as we will proceed as if there was no parent.
          */
-       rbd_dev->parent_overlap = overlap;
         if (!overlap) {
-
-               /* A null parent_spec indicates it's the initial probe */
-
                 if (parent_spec) {
-                       /*
-                        * The overlap has become zero, so the clone
-                        * must have been resized down to 0 at some
-                        * point.  Treat this the same as a flatten.
-                        */
-                       rbd_dev_parent_put(rbd_dev);
-                       pr_info("%s: clone image now standalone\n",
-                               rbd_dev->disk->disk_name);
+                       /* refresh, careful to warn just once */
+                       if (rbd_dev->parent_overlap)
+                               rbd_warn(rbd_dev,
+                                   "clone now standalone (overlap became 0)");
                 } else {
-                       /*
-                        * For the initial probe, if we find the
-                        * overlap is zero we just pretend there was
-                        * no parent image.
-                        */
-                       rbd_warn(rbd_dev, "ignoring parent with overlap 0");
+                       /* initial probe */
+                       rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
                 }
         }
+       rbd_dev->parent_overlap = overlap;
+
  out:
         ret = 0;
  out_err:
@@ -4770,36 +4773,6 @@ static inline size_t next_token(const char **buf)
         return strcspn(*buf, spaces);   /* Return token length */
  }
  
-/*
- * Finds the next token in *buf, and if the provided token buffer is
- * big enough, copies the found token into it.  The result, if
- * copied, is guaranteed to be terminated with '\0'.  Note that *buf
- * must be terminated with '\0' on entry.
- *
- * Returns the length of the token found (not including the '\0').
- * Return value will be 0 if no token is found, and it will be >=
- * token_size if the token would not fit.
- *
- * The *buf pointer will be updated to point beyond the end of the
- * found token.  Note that this occurs even if the token buffer is
- * too small to hold it.
- */
-static inline size_t copy_token(const char **buf,
-                               char *token,
-                               size_t token_size)
-{
-        size_t len;
-
-       len = next_token(buf);
-       if (len < token_size) {
-               memcpy(token, *buf, len);
-               *(token + len) = '\0';
-       }
-       *buf += len;
-
-        return len;
-}
-
  /*
   * Finds the next token in *buf, dynamically allocates a buffer big
   * enough to hold a copy of it, and copies the token into the new