Merge branch 'for-3.2/drivers' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
* 'for-3.2/drivers' of git://git.kernel.dk/linux-block: (30 commits)
  virtio-blk: use ida to allocate disk index
  hpsa: add small delay when using PCI Power Management to reset for kump
  cciss: add small delay when using PCI Power Management to reset for kump
  xen/blkback: Fix two races in the handling of barrier requests.
  xen/blkback: Check for proper operation.
  xen/blkback: Fix the inhibition to map pages when discarding sector ranges.
  xen/blkback: Report VBD_WSECT (wr_sect) properly.
  xen/blkback: Support 'feature-barrier' aka old-style BARRIER requests.
  xen-blkfront: plug device number leak in xlblk_init() error path
  xen-blkfront: If no barrier or flush is supported, use invalid operation.
  xen-blkback: use kzalloc() in favor of kmalloc()+memset()
  xen-blkback: fixed indentation and comments
  xen-blkfront: fix a deadlock while handling discard response
  xen-blkfront: Handle discard requests.
  xen-blkback: Implement discard requests ('feature-discard')
  xen-blkfront: add BLKIF_OP_DISCARD and discard request struct
  drivers/block/loop.c: remove unnecessary bdev argument from loop_clr_fd()
  drivers/block/loop.c: emit uevent on auto release
  drivers/block/cpqarray.c: use pci_dev->revision
  loop: always allow userspace partitions and optionally support automatic scanning
  ...

Fic up trivial header file includsion conflict in drivers/block/loop.c

18 files changed:
Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
Documentation/blockdev/cciss.txt
block/genhd.c
block/ioctl.c
drivers/block/cciss.c
drivers/block/cciss.h
drivers/block/cpqarray.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/scsi/hpsa.c
fs/block_dev.c
include/linux/genhd.h
include/linux/loop.h
include/xen/interface/io/blkif.h

index f5bb0a3bb8c0d5d18bdcfdd2bf8ce70765ed4dc5..53d99edd1d75acf45fc63152b111d5fbcc6ea202 100644 (file)
@@ -71,3 +71,10 @@ Description: Value of 1 indicates the controller can honor the reset_devices
                a dump device, as kdump requires resetting the device in order
                to work reliably.
 
+Where:         /sys/bus/pci/devices/<dev>/ccissX/transport_mode
+Date:          July 2011
+Kernel Version:        3.0
+Contact:       iss_storagedev@hp.com
+Description:   Value of "simple" indicates that the controller has been placed
+               in "simple mode". Value of "performant" indicates that the
+               controller has been placed in "performant mode".
index c00c6a5ab21f6b0d253bf465eb6e1274d5994d18..71464e09ec1841a39f031d972b281488e5d703af 100644 (file)
@@ -78,6 +78,16 @@ The device naming scheme is:
 /dev/cciss/c1d1p2              Controller 1, disk 1, partition 2
 /dev/cciss/c1d1p3              Controller 1, disk 1, partition 3
 
+CCISS simple mode support
+-------------------------
+
+The "cciss_simple_mode=1" boot parameter may be used to prevent the driver
+from putting the controller into "performant" mode. The difference is that
+with simple mode, each command completion requires an interrupt, while with
+"performant mode" (the default, and ordinarily better performing) it is
+possible to have multiple command completions indicated by a single
+interrupt.
+
 SCSI tape drive and medium changer support
 ------------------------------------------
 
index 024fc3944fb5b0a40311d5fde7d2c0cc950928a8..9253839714ff95b4acc6da413bd87f610ddce44c 100644 (file)
@@ -537,7 +537,7 @@ void register_disk(struct gendisk *disk)
        disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
        /* No minors to use for partitions */
-       if (!disk_partitionable(disk))
+       if (!disk_part_scan_enabled(disk))
                goto exit;
 
        /* No such device (e.g., media were just removed) */
@@ -848,7 +848,7 @@ static int show_partition(struct seq_file *seqf, void *v)
        char buf[BDEVNAME_SIZE];
 
        /* Don't show non-partitionable removeable devices or empty devices */
-       if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+       if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
                                   (sgp->flags & GENHD_FL_REMOVABLE)))
                return 0;
        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
index 1124cd297263571d63371ca8ed33f8c361b74e06..5c74efc019034af5ca519b747e5388258ebe83dd 100644 (file)
@@ -101,7 +101,7 @@ static int blkdev_reread_part(struct block_device *bdev)
        struct gendisk *disk = bdev->bd_disk;
        int res;
 
-       if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
+       if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
                return -EINVAL;
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
index 8f4ef656a1af4ca435ff3ecc5e0d6079e6a1e34d..486f94ef24d499bfe0d17eb379fe5043a94f36a5 100644 (file)
@@ -68,6 +68,10 @@ static int cciss_tape_cmds = 6;
 module_param(cciss_tape_cmds, int, 0644);
 MODULE_PARM_DESC(cciss_tape_cmds,
        "number of commands to allocate for tape devices (default: 6)");
+static int cciss_simple_mode;
+module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cciss_simple_mode,
+       "Use 'simple mode' rather than 'performant mode'");
 
 static DEFINE_MUTEX(cciss_mutex);
 static struct proc_dir_entry *proc_cciss;
@@ -176,6 +180,7 @@ static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
                        unsigned int block_size, InquiryData_struct *inq_buff,
                                   drive_info_struct *drv);
 static void __devinit cciss_interrupt_mode(ctlr_info_t *);
+static int __devinit cciss_enter_simple_mode(struct ctlr_info *h);
 static void start_io(ctlr_info_t *h);
 static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
                        __u8 page_code, unsigned char scsi3addr[],
@@ -388,7 +393,7 @@ static void cciss_seq_show_header(struct seq_file *seq)
                h->product_name,
                (unsigned long)h->board_id,
                h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
-               h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
+               h->firm_ver[3], (unsigned int)h->intr[h->intr_mode],
                h->num_luns,
                h->Qdepth, h->commands_outstanding,
                h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -636,6 +641,18 @@ static ssize_t host_store_rescan(struct device *dev,
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
 
+static ssize_t host_show_transport_mode(struct device *dev,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       struct ctlr_info *h = to_hba(dev);
+
+       return snprintf(buf, 20, "%s\n",
+               h->transMethod & CFGTBL_Trans_Performant ?
+                       "performant" : "simple");
+}
+static DEVICE_ATTR(transport_mode, S_IRUGO, host_show_transport_mode, NULL);
+
 static ssize_t dev_show_unique_id(struct device *dev,
                                 struct device_attribute *attr,
                                 char *buf)
@@ -808,6 +825,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
 static struct attribute *cciss_host_attrs[] = {
        &dev_attr_rescan.attr,
        &dev_attr_resettable.attr,
+       &dev_attr_transport_mode.attr,
        NULL
 };
 
@@ -3984,6 +4002,9 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
 {
        __u32 trans_support;
 
+       if (cciss_simple_mode)
+               return;
+
        dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
        /* Attempt to put controller into performant mode if supported */
        /* Does board support performant mode? */
@@ -4081,7 +4102,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
 default_int_mode:
 #endif                         /* CONFIG_PCI_MSI */
        /* if we get here we're going to use the default interrupt mode */
-       h->intr[PERF_MODE_INT] = h->pdev->irq;
+       h->intr[h->intr_mode] = h->pdev->irq;
        return;
 }
 
@@ -4341,6 +4362,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *h)
        }
        cciss_enable_scsi_prefetch(h);
        cciss_p600_dma_prefetch_quirk(h);
+       err = cciss_enter_simple_mode(h);
+       if (err)
+               goto err_out_free_res;
        cciss_put_controller_into_performant_mode(h);
        return 0;
 
@@ -4533,6 +4557,13 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
                pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
                pmcsr |= PCI_D0;
                pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+               /*
+                * The P600 requires a small delay when changing states.
+                * Otherwise we may think the board did not reset and we bail.
+                * This for kdump only and is particular to the P600.
+                */
+               msleep(500);
        }
        return 0;
 }
@@ -4843,20 +4874,20 @@ static int cciss_request_irq(ctlr_info_t *h,
        irqreturn_t (*intxhandler)(int, void *))
 {
        if (h->msix_vector || h->msi_vector) {
-               if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
+               if (!request_irq(h->intr[h->intr_mode], msixhandler,
                                IRQF_DISABLED, h->devname, h))
                        return 0;
                dev_err(&h->pdev->dev, "Unable to get msi irq %d"
-                       " for %s\n", h->intr[PERF_MODE_INT],
+                       " for %s\n", h->intr[h->intr_mode],
                        h->devname);
                return -1;
        }
 
-       if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
+       if (!request_irq(h->intr[h->intr_mode], intxhandler,
                        IRQF_DISABLED, h->devname, h))
                return 0;
        dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
-               h->intr[PERF_MODE_INT], h->devname);
+               h->intr[h->intr_mode], h->devname);
        return -1;
 }
 
@@ -4887,7 +4918,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
 {
        int ctlr = h->ctlr;
 
-       free_irq(h->intr[PERF_MODE_INT], h);
+       free_irq(h->intr[h->intr_mode], h);
 #ifdef CONFIG_PCI_MSI
        if (h->msix_vector)
                pci_disable_msix(h->pdev);
@@ -4953,6 +4984,7 @@ reinit_after_soft_reset:
        h = hba[i];
        h->pdev = pdev;
        h->busy_initializing = 1;
+       h->intr_mode = cciss_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
        INIT_LIST_HEAD(&h->cmpQ);
        INIT_LIST_HEAD(&h->reqQ);
        mutex_init(&h->busy_shutting_down);
@@ -5009,7 +5041,7 @@ reinit_after_soft_reset:
 
        dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
               h->devname, pdev->device, pci_name(pdev),
-              h->intr[PERF_MODE_INT], dac ? "" : " not");
+              h->intr[h->intr_mode], dac ? "" : " not");
 
        if (cciss_allocate_cmd_pool(h))
                goto clean4;
@@ -5056,7 +5088,7 @@ reinit_after_soft_reset:
                spin_lock_irqsave(&h->lock, flags);
                h->access.set_intr_mask(h, CCISS_INTR_OFF);
                spin_unlock_irqrestore(&h->lock, flags);
-               free_irq(h->intr[PERF_MODE_INT], h);
+               free_irq(h->intr[h->intr_mode], h);
                rc = cciss_request_irq(h, cciss_msix_discard_completions,
                                        cciss_intx_discard_completions);
                if (rc) {
@@ -5133,7 +5165,7 @@ clean4:
        cciss_free_cmd_pool(h);
        cciss_free_scatterlists(h);
        cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
-       free_irq(h->intr[PERF_MODE_INT], h);
+       free_irq(h->intr[h->intr_mode], h);
 clean2:
        unregister_blkdev(h->major, h->devname);
 clean1:
@@ -5172,9 +5204,31 @@ static void cciss_shutdown(struct pci_dev *pdev)
        if (return_code != IO_OK)
                dev_warn(&h->pdev->dev, "Error flushing cache\n");
        h->access.set_intr_mask(h, CCISS_INTR_OFF);
-       free_irq(h->intr[PERF_MODE_INT], h);
+       free_irq(h->intr[h->intr_mode], h);
+}
+
+static int __devinit cciss_enter_simple_mode(struct ctlr_info *h)
+{
+       u32 trans_support;
+
+       trans_support = readl(&(h->cfgtable->TransportSupport));
+       if (!(trans_support & SIMPLE_MODE))
+               return -ENOTSUPP;
+
+       h->max_commands = readl(&(h->cfgtable->CmdsOutMax));
+       writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
+       writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+       cciss_wait_for_mode_change_ack(h);
+       print_cfg_table(h);
+       if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
+               dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
+               return -ENODEV;
+       }
+       h->transMethod = CFGTBL_Trans_Simple;
+       return 0;
 }
 
+
 static void __devexit cciss_remove_one(struct pci_dev *pdev)
 {
        ctlr_info_t *h;
index c049548e68b7ba39d27cf7b9c216868c8fc2b18e..7fda30e4a2416195696b6a4eebd560d74fca6917 100644 (file)
@@ -92,6 +92,7 @@ struct ctlr_info
        unsigned int intr[4];
        unsigned int msix_vector;
        unsigned int msi_vector;
+       int     intr_mode;
        int     cciss_max_sectors;
        BYTE    cciss_read;
        BYTE    cciss_write;
index b2fceb53e8092bd28355f7678ea4e92351952299..9125bbeacd4dcc6fa49043b7fe795b1c1c78b013 100644 (file)
@@ -620,6 +620,7 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
        }
        vendor_id = pdev->vendor;
        device_id = pdev->device;
+       revision  = pdev->revision;
        irq = pdev->irq;
 
        for(i=0; i<6; i++)
@@ -632,7 +633,6 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
        }
 
        pci_read_config_word(pdev, PCI_COMMAND, &command);
-       pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
        pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size);
        pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer);
 
index c77983ea86c8798a35605206e35266087f764809..3d806820280e3bc4aaa5e81d6bea411f6597e400 100644 (file)
@@ -76,6 +76,8 @@
 #include <linux/splice.h>
 #include <linux/sysfs.h>
 #include <linux/miscdevice.h>
+#include <linux/falloc.h>
+
 #include <asm/uaccess.h>
 
 static DEFINE_IDR(loop_index_idr);
@@ -407,6 +409,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
                        }
                }
 
+               /*
+                * We use punch hole to reclaim the free space used by the
+                * image a.k.a. discard. However we do support discard if
+                * encryption is enabled, because it may give an attacker
+                * useful information.
+                */
+               if (bio->bi_rw & REQ_DISCARD) {
+                       struct file *file = lo->lo_backing_file;
+                       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+
+                       if ((!file->f_op->fallocate) ||
+                           lo->lo_encrypt_key_size) {
+                               ret = -EOPNOTSUPP;
+                               goto out;
+                       }
+                       ret = file->f_op->fallocate(file, mode, pos,
+                                                   bio->bi_size);
+                       if (unlikely(ret && ret != -EINVAL &&
+                                    ret != -EOPNOTSUPP))
+                               ret = -EIO;
+                       goto out;
+               }
+
                ret = lo_send(lo, bio, pos);
 
                if ((bio->bi_rw & REQ_FUA) && !ret) {
@@ -622,7 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
                goto out_putf;
 
        fput(old_file);
-       if (max_part > 0)
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
        return 0;
 
@@ -699,16 +724,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
        return sprintf(buf, "%s\n", autoclear ? "1" : "0");
 }
 
+static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
+{
+       int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
+
+       return sprintf(buf, "%s\n", partscan ? "1" : "0");
+}
+
 LOOP_ATTR_RO(backing_file);
 LOOP_ATTR_RO(offset);
 LOOP_ATTR_RO(sizelimit);
 LOOP_ATTR_RO(autoclear);
+LOOP_ATTR_RO(partscan);
 
 static struct attribute *loop_attrs[] = {
        &loop_attr_backing_file.attr,
        &loop_attr_offset.attr,
        &loop_attr_sizelimit.attr,
        &loop_attr_autoclear.attr,
+       &loop_attr_partscan.attr,
        NULL,
 };
 
@@ -729,6 +763,35 @@ static void loop_sysfs_exit(struct loop_device *lo)
                           &loop_attribute_group);
 }
 
+static void loop_config_discard(struct loop_device *lo)
+{
+       struct file *file = lo->lo_backing_file;
+       struct inode *inode = file->f_mapping->host;
+       struct request_queue *q = lo->lo_queue;
+
+       /*
+        * We use punch hole to reclaim the free space used by the
+        * image a.k.a. discard. However we do support discard if
+        * encryption is enabled, because it may give an attacker
+        * useful information.
+        */
+       if ((!file->f_op->fallocate) ||
+           lo->lo_encrypt_key_size) {
+               q->limits.discard_granularity = 0;
+               q->limits.discard_alignment = 0;
+               q->limits.max_discard_sectors = 0;
+               q->limits.discard_zeroes_data = 0;
+               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+               return;
+       }
+
+       q->limits.discard_granularity = inode->i_sb->s_blocksize;
+       q->limits.discard_alignment = inode->i_sb->s_blocksize;
+       q->limits.max_discard_sectors = UINT_MAX >> 9;
+       q->limits.discard_zeroes_data = 1;
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+}
+
 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
                       struct block_device *bdev, unsigned int arg)
 {
@@ -829,7 +892,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        }
        lo->lo_state = Lo_bound;
        wake_up_process(lo->lo_thread);
-       if (max_part > 0)
+       if (part_shift)
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
        return 0;
 
@@ -890,10 +955,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
        return err;
 }
 
-static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
+static int loop_clr_fd(struct loop_device *lo)
 {
        struct file *filp = lo->lo_backing_file;
        gfp_t gfp = lo->old_gfp_mask;
+       struct block_device *bdev = lo->lo_device;
 
        if (lo->lo_state != Lo_bound)
                return -ENXIO;
@@ -922,7 +988,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
        lo->lo_offset = 0;
        lo->lo_sizelimit = 0;
        lo->lo_encrypt_key_size = 0;
-       lo->lo_flags = 0;
        lo->lo_thread = NULL;
        memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
        memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -940,8 +1005,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
        lo->lo_state = Lo_unbound;
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
-       if (max_part > 0 && bdev)
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
+       lo->lo_flags = 0;
+       if (!part_shift)
+               lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
        mutex_unlock(&lo->lo_ctl_mutex);
        /*
         * Need not hold lo_ctl_mutex to fput backing file.
@@ -995,6 +1063,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
                if (figure_loop_size(lo))
                        return -EFBIG;
        }
+       loop_config_discard(lo);
 
        memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
        memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@ -1010,6 +1079,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
             (info->lo_flags & LO_FLAGS_AUTOCLEAR))
                lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
 
+       if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
+            !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+               lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+               ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+       }
+
        lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
        lo->lo_init[0] = info->lo_init[0];
        lo->lo_init[1] = info->lo_init[1];
@@ -1203,7 +1279,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
                break;
        case LOOP_CLR_FD:
                /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
-               err = loop_clr_fd(lo, bdev);
+               err = loop_clr_fd(lo);
                if (!err)
                        goto out_unlocked;
                break;
@@ -1423,7 +1499,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
                 * In autoclear mode, stop the loop thread
                 * and remove configuration after last close.
                 */
-               err = loop_clr_fd(lo, NULL);
+               err = loop_clr_fd(lo);
                if (!err)
                        goto out_unlocked;
        } else {
@@ -1545,6 +1621,27 @@ static int loop_add(struct loop_device **l, int i)
        if (!disk)
                goto out_free_queue;
 
+       /*
+        * Disable partition scanning by default. The in-kernel partition
+        * scanning can be requested individually per-device during its
+        * setup. Userspace can always add and remove partitions from all
+        * devices. The needed partition minors are allocated from the
+        * extended minor space, the main loop device numbers will continue
+        * to match the loop minors, regardless of the number of partitions
+        * used.
+        *
+        * If max_part is given, partition scanning is globally enabled for
+        * all loop devices. The minors for the main loop devices will be
+        * multiples of max_part.
+        *
+        * Note: Global-for-all-devices, set-only-at-init, read-only module
+        * parameteters like 'max_loop' and 'max_part' make things needlessly
+        * complicated, are too static, inflexible and may surprise
+        * userspace tools. Parameters like this in general should be avoided.
+        */
+       if (!part_shift)
+               disk->flags |= GENHD_FL_NO_PART_SCAN;
+       disk->flags |= GENHD_FL_EXT_DEVT;
        mutex_init(&lo->lo_ctl_mutex);
        lo->lo_number           = i;
        lo->lo_thread           = NULL;
index f533f3375e24751feaf4e065b920291c7abe149d..c3f0ee16594dac8d33b92d2dafd02a6ee4e1526d 100644 (file)
@@ -127,8 +127,7 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
        if (lock)
                mutex_lock(&lo->tx_lock);
        if (lo->sock) {
-               printk(KERN_WARNING "%s: shutting down socket\n",
-                       lo->disk->disk_name);
+               dev_warn(disk_to_dev(lo->disk), "shutting down socket\n");
                kernel_sock_shutdown(lo->sock, SHUT_RDWR);
                lo->sock = NULL;
        }
@@ -158,8 +157,9 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
        sigset_t blocked, oldset;
 
        if (unlikely(!sock)) {
-               printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n",
-                      lo->disk->disk_name, (send ? "send" : "recv"));
+               dev_err(disk_to_dev(lo->disk),
+                       "Attempted %s on closed socket in sock_xmit\n",
+                       (send ? "send" : "recv"));
                return -EINVAL;
        }
 
@@ -250,8 +250,8 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
        result = sock_xmit(lo, 1, &request, sizeof(request),
                        (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
        if (result <= 0) {
-               printk(KERN_ERR "%s: Send control failed (result %d)\n",
-                               lo->disk->disk_name, result);
+               dev_err(disk_to_dev(lo->disk),
+                       "Send control failed (result %d)\n", result);
                goto error_out;
        }
 
@@ -270,8 +270,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
                                        lo->disk->disk_name, req, bvec->bv_len);
                        result = sock_send_bvec(lo, bvec, flags);
                        if (result <= 0) {
-                               printk(KERN_ERR "%s: Send data failed (result %d)\n",
-                                               lo->disk->disk_name, result);
+                               dev_err(disk_to_dev(lo->disk),
+                                       "Send data failed (result %d)\n",
+                                       result);
                                goto error_out;
                        }
                }
@@ -328,14 +329,13 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
        reply.magic = 0;
        result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
        if (result <= 0) {
-               printk(KERN_ERR "%s: Receive control failed (result %d)\n",
-                               lo->disk->disk_name, result);
+               dev_err(disk_to_dev(lo->disk),
+                       "Receive control failed (result %d)\n", result);
                goto harderror;
        }
 
        if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
-               printk(KERN_ERR "%s: Wrong magic (0x%lx)\n",
-                               lo->disk->disk_name,
+               dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n",
                                (unsigned long)ntohl(reply.magic));
                result = -EPROTO;
                goto harderror;
@@ -347,15 +347,15 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
                if (result != -ENOENT)
                        goto harderror;
 
-               printk(KERN_ERR "%s: Unexpected reply (%p)\n",
-                               lo->disk->disk_name, reply.handle);
+               dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n",
+                       reply.handle);
                result = -EBADR;
                goto harderror;
        }
 
        if (ntohl(reply.error)) {
-               printk(KERN_ERR "%s: Other side returned error (%d)\n",
-                               lo->disk->disk_name, ntohl(reply.error));
+               dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n",
+                       ntohl(reply.error));
                req->errors++;
                return req;
        }
@@ -369,8 +369,8 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
                rq_for_each_segment(bvec, req, iter) {
                        result = sock_recv_bvec(lo, bvec);
                        if (result <= 0) {
-                               printk(KERN_ERR "%s: Receive data failed (result %d)\n",
-                                               lo->disk->disk_name, result);
+                               dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n",
+                                       result);
                                req->errors++;
                                return req;
                        }
@@ -405,10 +405,10 @@ static int nbd_do_it(struct nbd_device *lo)
 
        BUG_ON(lo->magic != LO_MAGIC);
 
-       lo->pid = current->pid;
-       ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+       lo->pid = task_pid_nr(current);
+       ret = device_create_file(disk_to_dev(lo->disk), &pid_attr);
        if (ret) {
-               printk(KERN_ERR "nbd: sysfs_create_file failed!");
+               dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n");
                lo->pid = 0;
                return ret;
        }
@@ -416,7 +416,7 @@ static int nbd_do_it(struct nbd_device *lo)
        while ((req = nbd_read_stat(lo)) != NULL)
                nbd_end_request(req);
 
-       sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+       device_remove_file(disk_to_dev(lo->disk), &pid_attr);
        lo->pid = 0;
        return 0;
 }
@@ -457,8 +457,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
        if (rq_data_dir(req) == WRITE) {
                nbd_cmd(req) = NBD_CMD_WRITE;
                if (lo->flags & NBD_READ_ONLY) {
-                       printk(KERN_ERR "%s: Write on read-only\n",
-                                       lo->disk->disk_name);
+                       dev_err(disk_to_dev(lo->disk),
+                               "Write on read-only\n");
                        goto error_out;
                }
        }
@@ -468,16 +468,15 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
        mutex_lock(&lo->tx_lock);
        if (unlikely(!lo->sock)) {
                mutex_unlock(&lo->tx_lock);
-               printk(KERN_ERR "%s: Attempted send on closed socket\n",
-                      lo->disk->disk_name);
+               dev_err(disk_to_dev(lo->disk),
+                       "Attempted send on closed socket\n");
                goto error_out;
        }
 
        lo->active_req = req;
 
        if (nbd_send_req(lo, req) != 0) {
-               printk(KERN_ERR "%s: Request send failed\n",
-                               lo->disk->disk_name);
+               dev_err(disk_to_dev(lo->disk), "Request send failed\n");
                req->errors++;
                nbd_end_request(req);
        } else {
@@ -549,8 +548,8 @@ static void do_nbd_request(struct request_queue *q)
                BUG_ON(lo->magic != LO_MAGIC);
 
                if (unlikely(!lo->sock)) {
-                       printk(KERN_ERR "%s: Attempted send on closed socket\n",
-                               lo->disk->disk_name);
+                       dev_err(disk_to_dev(lo->disk),
+                               "Attempted send on closed socket\n");
                        req->errors++;
                        nbd_end_request(req);
                        spin_lock_irq(q->queue_lock);
@@ -576,7 +575,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
        case NBD_DISCONNECT: {
                struct request sreq;
 
-               printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+               dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n");
 
                blk_rq_init(NULL, &sreq);
                sreq.cmd_type = REQ_TYPE_SPECIAL;
@@ -674,7 +673,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
                file = lo->file;
                lo->file = NULL;
                nbd_clear_que(lo);
-               printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
+               dev_warn(disk_to_dev(lo->disk), "queue cleared\n");
                if (file)
                        fput(file);
                lo->bytesize = 0;
@@ -694,8 +693,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
                return 0;
 
        case NBD_PRINT_DEBUG:
-               printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
-                       bdev->bd_disk->disk_name,
+               dev_info(disk_to_dev(lo->disk),
+                       "next = %p, prev = %p, head = %p\n",
                        lo->queue_head.next, lo->queue_head.prev,
                        &lo->queue_head);
                return 0;
@@ -745,7 +744,7 @@ static int __init nbd_init(void)
        BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
 
        if (max_part < 0) {
-               printk(KERN_CRIT "nbd: max_part must be >= 0\n");
+               printk(KERN_ERR "nbd: max_part must be >= 0\n");
                return -EINVAL;
        }
 
index 1540792b1e547ee882fcbc168bb5d4156d5d1678..15ec4db194d1bb793e4409dfed54563e4b71b080 100644 (file)
@@ -39,6 +39,9 @@
 #include <linux/list.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
+#include <linux/loop.h>
+#include <linux/falloc.h>
+#include <linux/fs.h>
 
 #include <xen/events.h>
 #include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 
 static void print_stats(struct xen_blkif *blkif)
 {
-       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
+                "  |  ds %4d\n",
                 current->comm, blkif->st_oo_req,
-                blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+                blkif->st_rd_req, blkif->st_wr_req,
+                blkif->st_f_req, blkif->st_ds_req);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
        blkif->st_wr_req = 0;
        blkif->st_oo_req = 0;
+       blkif->st_ds_req = 0;
 }
 
 int xen_blkif_schedule(void *arg)
@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
        return ret;
 }
 
+static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+{
+       int err = 0;
+       int status = BLKIF_RSP_OKAY;
+       struct block_device *bdev = blkif->vbd.bdev;
+
+       if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+               /* just forward the discard request */
+               err = blkdev_issue_discard(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors,
+                               GFP_KERNEL, 0);
+       else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+               /* punch a hole in the backing file */
+               struct loop_device *lo = bdev->bd_disk->private_data;
+               struct file *file = lo->lo_backing_file;
+
+               if (file->f_op->fallocate)
+                       err = file->f_op->fallocate(file,
+                               FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                               req->u.discard.sector_number << 9,
+                               req->u.discard.nr_sectors << 9);
+               else
+                       err = -EOPNOTSUPP;
+       } else
+               err = -EOPNOTSUPP;
+
+       if (err == -EOPNOTSUPP) {
+               pr_debug(DRV_PFX "discard op failed, not supported\n");
+               status = BLKIF_RSP_EOPNOTSUPP;
+       } else if (err)
+               status = BLKIF_RSP_ERROR;
+
+       make_response(blkif, req->id, req->operation, status);
+}
+
+static void xen_blk_drain_io(struct xen_blkif *blkif)
+{
+       atomic_set(&blkif->drain, 1);
+       do {
+               /* The initial value is one, and one refcnt taken at the
+                * start of the xen_blkif_schedule thread. */
+               if (atomic_read(&blkif->refcnt) <= 2)
+                       break;
+               wait_for_completion_interruptible_timeout(
+                               &blkif->drain_complete, HZ);
+
+               if (!atomic_read(&blkif->drain))
+                       break;
+       } while (!kthread_should_stop());
+       atomic_set(&blkif->drain, 0);
+}
+
 /*
  * Completion callback on the bio's. Called as bh->b_end_io()
  */
@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
                pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
                xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
                pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+       } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+                   (error == -EOPNOTSUPP)) {
+               pr_debug(DRV_PFX "write barrier op failed, not supported\n");
+               xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+               pending_req->status = BLKIF_RSP_EOPNOTSUPP;
        } else if (error) {
                pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
                         " error=%d\n", error);
@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
                make_response(pending_req->blkif, pending_req->id,
                              pending_req->operation, pending_req->status);
                xen_blkif_put(pending_req->blkif);
+               if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
+                       if (atomic_read(&pending_req->blkif->drain))
+                               complete(&pending_req->blkif->drain_complete);
+               }
                free_req(pending_req);
        }
 }
@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)
 
        return more_to_do;
 }
-
 /*
  * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
  * and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        int i, nbio = 0;
        int operation;
        struct blk_plug plug;
+       bool drain = false;
 
        switch (req->operation) {
        case BLKIF_OP_READ:
@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                blkif->st_wr_req++;
                operation = WRITE_ODIRECT;
                break;
+       case BLKIF_OP_WRITE_BARRIER:
+               drain = true;
        case BLKIF_OP_FLUSH_DISKCACHE:
                blkif->st_f_req++;
                operation = WRITE_FLUSH;
                break;
-       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_DISCARD:
+               blkif->st_ds_req++;
+               operation = REQ_DISCARD;
+               break;
        default:
                operation = 0; /* make gcc happy */
                goto fail_response;
@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
        /* Check that the number of segments is sane. */
        nseg = req->nr_segments;
-       if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+       if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
+                               operation != REQ_DISCARD) ||
            unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                         nseg);
@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                }
        }
 
+       /* Wait on all outstanding I/O's and once that has been completed
+        * issue the WRITE_FLUSH.
+        */
+       if (drain)
+               xen_blk_drain_io(pending_req->blkif);
+
        /*
         * If we have failed at this point, we need to undo the M2P override,
         * set gnttab_set_unmap_op on all of the grant references and perform
         * the hypercall to unmap the grants - that is all done in
         * xen_blkbk_unmap.
         */
-       if (xen_blkbk_map(req, pending_req, seg))
+       if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
                goto fail_flush;
 
-       /* This corresponding xen_blkif_put is done in __end_block_io_op */
+       /*
+        * This corresponding xen_blkif_put is done in __end_block_io_op, or
+        * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
+        */
        xen_blkif_get(blkif);
 
        for (i = 0; i < nseg; i++) {
@@ -654,18 +737,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                preq.sector_number += seg[i].nsec;
        }
 
-       /* This will be hit if the operation was a flush. */
+       /* This will be hit if the operation was a flush or discard. */
        if (!bio) {
-               BUG_ON(operation != WRITE_FLUSH);
+               BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
 
-               bio = bio_alloc(GFP_KERNEL, 0);
-               if (unlikely(bio == NULL))
-                       goto fail_put_bio;
+               if (operation == WRITE_FLUSH) {
+                       bio = bio_alloc(GFP_KERNEL, 0);
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
 
-               biolist[nbio++] = bio;
-               bio->bi_bdev    = preq.bdev;
-               bio->bi_private = pending_req;
-               bio->bi_end_io  = end_block_io_op;
+                       biolist[nbio++] = bio;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+               } else if (operation == REQ_DISCARD) {
+                       xen_blk_discard(blkif, req);
+                       xen_blkif_put(blkif);
+                       free_req(pending_req);
+                       return 0;
+               }
        }
 
        /*
@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
        if (operation == READ)
                blkif->st_rd_sect += preq.nr_sects;
-       else if (operation == WRITE || operation == WRITE_FLUSH)
+       else if (operation & WRITE)
                blkif->st_wr_sect += preq.nr_sects;
 
        return 0;
@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)
 
        mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
-       blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+       blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
                                        xen_blkif_reqs, GFP_KERNEL);
-       blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+       blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
                                        mmap_pages, GFP_KERNEL);
        blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
                                        mmap_pages, GFP_KERNEL);
@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
        if (rc)
                goto failed_init;
 
-       memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
-
        INIT_LIST_HEAD(&blkbk->pending_free);
        spin_lock_init(&blkbk->pending_free_lock);
        init_waitqueue_head(&blkbk->pending_free_wq);
index c4bd34063ecc8f01303932d4ec35d239e2f0d4a0..de09f525d6c174509af3a1e25ce6609fbb2ef1fc 100644 (file)
@@ -62,13 +62,26 @@ struct blkif_common_response {
 
 /* i386 protocol version */
 #pragma pack(push, 4)
+
+struct blkif_x86_32_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_32_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+};
+
 struct blkif_x86_32_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
        uint64_t       id;           /* private guest value, echoed in resp  */
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_32_request_rw rw;
+               struct blkif_x86_32_request_discard discard;
+       } u;
 };
 struct blkif_x86_32_response {
        uint64_t        id;              /* copied from request */
@@ -78,13 +91,26 @@ struct blkif_x86_32_response {
 #pragma pack(pop)
 
 /* x86_64 protocol version */
+
+struct blkif_x86_64_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_64_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+};
+
 struct blkif_x86_64_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
        uint64_t       __attribute__((__aligned__(8))) id;
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_64_request_rw rw;
+               struct blkif_x86_64_request_discard discard;
+       } u;
 };
 struct blkif_x86_64_response {
        uint64_t       __attribute__((__aligned__(8))) id;
@@ -112,6 +138,11 @@ enum blkif_protocol {
        BLKIF_PROTOCOL_X86_64 = 3,
 };
 
+enum blkif_backend_type {
+       BLKIF_BACKEND_PHY  = 1,
+       BLKIF_BACKEND_FILE = 2,
+};
+
 struct xen_vbd {
        /* What the domain refers to this vbd as. */
        blkif_vdev_t            handle;
@@ -137,6 +168,7 @@ struct xen_blkif {
        unsigned int            irq;
        /* Comms information. */
        enum blkif_protocol     blk_protocol;
+       enum blkif_backend_type blk_backend_type;
        union blkif_back_rings  blk_rings;
        struct vm_struct        *blk_ring_area;
        /* The VBD attached to this interface. */
@@ -148,6 +180,9 @@ struct xen_blkif {
        atomic_t                refcnt;
 
        wait_queue_head_t       wq;
+       /* for barrier (drain) requests */
+       struct completion       drain_complete;
+       atomic_t                drain;
        /* One thread per one blkif. */
        struct task_struct      *xenblkd;
        unsigned int            waiting_reqs;
@@ -158,6 +193,7 @@ struct xen_blkif {
        int                     st_wr_req;
        int                     st_oo_req;
        int                     st_f_req;
+       int                     st_ds_req;
        int                     st_rd_sect;
        int                     st_wr_sect;
 
@@ -181,7 +217,7 @@ struct xen_blkif {
 
 struct phys_req {
        unsigned short          dev;
-       unsigned short          nr_sects;
+       blkif_sector_t          nr_sects;
        struct block_device     *bdev;
        blkif_sector_t          sector_number;
 };
@@ -195,6 +231,8 @@ int xen_blkif_schedule(void *arg);
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                              struct backend_info *be, int state);
 
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+                     struct backend_info *be, int state);
 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@ -205,12 +243,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
        dst->nr_segments = src->nr_segments;
        dst->handle = src->handle;
        dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
 }
 
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -221,12 +272,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
        dst->nr_segments = src->nr_segments;
        dst->handle = src->handle;
        dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
 }
 
 #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
index 5fd2010f7d2bd96e1dc7b4c290fa71299d480d14..2c008afe63d9dbb5499712f9abd3333904a35a3c 100644 (file)
@@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        spin_lock_init(&blkif->blk_ring_lock);
        atomic_set(&blkif->refcnt, 1);
        init_waitqueue_head(&blkif->wq);
+       init_completion(&blkif->drain_complete);
+       atomic_set(&blkif->drain, 0);
        blkif->st_print = jiffies;
        init_waitqueue_head(&blkif->waiting_to_free);
 
@@ -272,6 +274,7 @@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
 VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
 VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
 VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
+VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
 VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
 VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
 
@@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
        &dev_attr_rd_req.attr,
        &dev_attr_wr_req.attr,
        &dev_attr_f_req.attr,
+       &dev_attr_ds_req.attr,
        &dev_attr_rd_sect.attr,
        &dev_attr_wr_sect.attr,
        NULL
@@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
        return err;
 }
 
+int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+{
+       struct xenbus_device *dev = be->dev;
+       struct xen_blkif *blkif = be->blkif;
+       char *type;
+       int err;
+       int state = 0;
+
+       type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+       if (!IS_ERR(type)) {
+               if (strncmp(type, "file", 4) == 0) {
+                       state = 1;
+                       blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+               }
+               if (strncmp(type, "phy", 3) == 0) {
+                       struct block_device *bdev = be->blkif->vbd.bdev;
+                       struct request_queue *q = bdev_get_queue(bdev);
+                       if (blk_queue_discard(q)) {
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-granularity", "%u",
+                                       q->limits.discard_granularity);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-granularity");
+                                       goto kfree;
+                               }
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-alignment", "%u",
+                                       q->limits.discard_alignment);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-alignment");
+                                       goto kfree;
+                               }
+                               state = 1;
+                               blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+                       }
+               }
+       } else {
+               err = PTR_ERR(type);
+               xenbus_dev_fatal(dev, err, "reading type");
+               goto out;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-discard");
+kfree:
+       kfree(type);
+out:
+       return err;
+}
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+                     struct backend_info *be, int state)
+{
+       struct xenbus_device *dev = be->dev;
+       int err;
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-barrier");
+
+       return err;
+}
+
 /*
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -650,6 +721,11 @@ again:
        if (err)
                goto abort;
 
+       err = xen_blkbk_discard(xbt, be);
+
+       /* If we can't advertise it is OK. */
+       err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            (unsigned long long)vbd_sz(&be->blkif->vbd));
        if (err) {
index 9ea8c2576c70e768f22ad8ea2cc423337611fb4f..7b2ec5908413da7b989044828879afa173613b87 100644 (file)
@@ -98,6 +98,9 @@ struct blkfront_info
        unsigned long shadow_free;
        unsigned int feature_flush;
        unsigned int flush_op;
+       unsigned int feature_discard;
+       unsigned int discard_granularity;
+       unsigned int discard_alignment;
        int is_ready;
 };
 
@@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
                ring_req->operation = info->flush_op;
        }
 
-       ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
-       BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+               /* id, sector_number and handle are set above. */
+               ring_req->operation = BLKIF_OP_DISCARD;
+               ring_req->nr_segments = 0;
+               ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+       } else {
+               ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
+               BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
-       for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
-               buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
-               fsect = sg->offset >> 9;
-               lsect = fsect + (sg->length >> 9) - 1;
-               /* install a grant reference. */
-               ref = gnttab_claim_grant_reference(&gref_head);
-               BUG_ON(ref == -ENOSPC);
+               for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+                       buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
+                       fsect = sg->offset >> 9;
+                       lsect = fsect + (sg->length >> 9) - 1;
+                       /* install a grant reference. */
+                       ref = gnttab_claim_grant_reference(&gref_head);
+                       BUG_ON(ref == -ENOSPC);
 
-               gnttab_grant_foreign_access_ref(
-                               ref,
-                               info->xbdev->otherend_id,
-                               buffer_mfn,
-                               rq_data_dir(req) );
-
-               info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
-               ring_req->u.rw.seg[i] =
-                               (struct blkif_request_segment) {
-                                       .gref       = ref,
-                                       .first_sect = fsect,
-                                       .last_sect  = lsect };
+                       gnttab_grant_foreign_access_ref(
+                                       ref,
+                                       info->xbdev->otherend_id,
+                                       buffer_mfn,
+                                       rq_data_dir(req));
+
+                       info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+                       ring_req->u.rw.seg[i] =
+                                       (struct blkif_request_segment) {
+                                               .gref       = ref,
+                                               .first_sect = fsect,
+                                               .last_sect  = lsect };
+               }
        }
 
        info->ring.req_prod_pvt++;
@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)
 
                blk_start_request(req);
 
-               if (req->cmd_type != REQ_TYPE_FS) {
+               if ((req->cmd_type != REQ_TYPE_FS) ||
+                   ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
+                   !info->flush_op)) {
                        __blk_end_request_all(req, -EIO);
                        continue;
                }
@@ -399,6 +411,7 @@ wait:
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 {
        struct request_queue *rq;
+       struct blkfront_info *info = gd->private_data;
 
        rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
        if (rq == NULL)
@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 
        queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
+       if (info->feature_discard) {
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+               blk_queue_max_discard_sectors(rq, get_capacity(gd));
+               rq->limits.discard_granularity = info->discard_granularity;
+               rq->limits.discard_alignment = info->discard_alignment;
+       }
+
        /* Hard sector size and max sectors impersonate the equiv. hardware. */
        blk_queue_logical_block_size(rq, sector_size);
        blk_queue_max_hw_sectors(rq, 512);
@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
                error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
                switch (bret->operation) {
+               case BLKIF_OP_DISCARD:
+                       if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+                               struct request_queue *rq = info->rq;
+                               printk(KERN_WARNING "blkfront: %s: discard op failed\n",
+                                          info->gd->disk_name);
+                               error = -EOPNOTSUPP;
+                               info->feature_discard = 0;
+                               queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+                       }
+                       __blk_end_request_all(req, error);
+                       break;
                case BLKIF_OP_FLUSH_DISKCACHE:
                case BLKIF_OP_WRITE_BARRIER:
                        if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
        bdput(bdev);
 }
 
+static void blkfront_setup_discard(struct blkfront_info *info)
+{
+       int err;
+       char *type;
+       unsigned int discard_granularity;
+       unsigned int discard_alignment;
+
+       type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
+       if (IS_ERR(type))
+               return;
+
+       if (strncmp(type, "phy", 3) == 0) {
+               err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                       "discard-granularity", "%u", &discard_granularity,
+                       "discard-alignment", "%u", &discard_alignment,
+                       NULL);
+               if (!err) {
+                       info->feature_discard = 1;
+                       info->discard_granularity = discard_granularity;
+                       info->discard_alignment = discard_alignment;
+               }
+       } else if (strncmp(type, "file", 4) == 0)
+               info->feature_discard = 1;
+
+       kfree(type);
+}
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
        unsigned long sector_size;
        unsigned int binfo;
        int err;
-       int barrier, flush;
+       int barrier, flush, discard;
 
        switch (info->connected) {
        case BLKIF_STATE_CONNECTED:
@@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info)
                info->feature_flush = REQ_FLUSH;
                info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
        }
-               
+
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "feature-discard", "%d", &discard,
+                           NULL);
+
+       if (!err && discard)
+               blkfront_setup_discard(info);
+
        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {
 
 static int __init xlblk_init(void)
 {
+       int ret;
+
        if (!xen_domain())
                return -ENODEV;
 
@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
                return -ENODEV;
        }
 
-       return xenbus_register_frontend(&blkfront);
+       ret = xenbus_register_frontend(&blkfront);
+       if (ret) {
+               unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
+               return ret;
+       }
+
+       return 0;
 }
 module_init(xlblk_init);
 
index 9825ecf3495793cc5a6c1b166337d9ab85a0cf58..bbdc9f960a66fd7105a314f89e900e5b17492af9 100644 (file)
@@ -3300,6 +3300,13 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev,
                pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
                pmcsr |= PCI_D0;
                pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+               /*
+                * The P600 requires a small delay when changing states.
+                * Otherwise we may think the board did not reset and we bail.
+                * This for kdump only and is particular to the P600.
+                */
+               msleep(500);
        }
        return 0;
 }
index 1c44b8d54504e1c633774fd6befe966f0c1e7dc1..b07f1da1de4e34470fd64af913c9366e0d6c8513 100644 (file)
@@ -971,7 +971,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
 
        if (!bdev->bd_disk)
                return;
-       if (disk_partitionable(bdev->bd_disk))
+       if (disk_part_scan_enabled(bdev->bd_disk))
                bdev->bd_invalidated = 1;
 }
 
index 6957350e122f2444173ff94e0d650e0dc01ef8c6..9de31bc98c8803bc96bac1f0751da3ca695bf8df 100644 (file)
@@ -131,6 +131,7 @@ struct hd_struct {
 #define GENHD_FL_EXT_DEVT                      64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY               128
 #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE    256
+#define GENHD_FL_NO_PART_SCAN                  512
 
 enum {
        DISK_EVENT_MEDIA_CHANGE                 = 1 << 0, /* media changed */
@@ -238,9 +239,10 @@ static inline int disk_max_parts(struct gendisk *disk)
        return disk->minors;
 }
 
-static inline bool disk_partitionable(struct gendisk *disk)
+static inline bool disk_part_scan_enabled(struct gendisk *disk)
 {
-       return disk_max_parts(disk) > 1;
+       return disk_max_parts(disk) > 1 &&
+               !(disk->flags & GENHD_FL_NO_PART_SCAN);
 }
 
 static inline dev_t disk_devt(struct gendisk *disk)
index a06880689115ded34b73c39443004638635f2d3b..11a41a8f08eb9e98cb1105982d093c3d075d5206 100644 (file)
@@ -74,6 +74,7 @@ struct loop_device {
 enum {
        LO_FLAGS_READ_ONLY      = 1,
        LO_FLAGS_AUTOCLEAR      = 4,
+       LO_FLAGS_PARTSCAN       = 8,
 };
 
 #include <asm/posix_types.h>   /* for __kernel_old_dev_t */
index 3d5d6db864fe9781e2fc79214474f7101c84187c..9324488f23f0b2eaab58e50f7cdda0b6d4b152ef 100644 (file)
@@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t;
  * "feature-flush-cache" node!
  */
 #define BLKIF_OP_FLUSH_DISKCACHE   3
+
+/*
+ * Recognised only if "feature-discard" is present in backend xenbus info.
+ * The "feature-discard" node contains a boolean indicating whether trim
+ * (ATA) or unmap (SCSI) - conviently called discard requests are likely
+ * to succeed or fail. Either way, a discard request
+ * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
+ * the underlying block-device hardware. The boolean simply indicates whether
+ * or not it is worthwhile for the frontend to attempt discard requests.
+ * If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
+ * create the "feature-discard" node!
+ *
+ * Discard operation is a request for the underlying block device to mark
+ * extents to be erased. However, discard does not guarantee that the blocks
+ * will be erased from the device - it is just a hint to the device
+ * controller that these blocks are no longer in use. What the device
+ * controller does with that information is left to the controller.
+ * Discard operations are passed with sector_number as the
+ * sector index to begin discard operations at and nr_sectors as the number of
+ * sectors to be discarded. The specified sectors should be discarded if the
+ * underlying block device supports trim (ATA) or unmap (SCSI) operations,
+ * or a BLKIF_RSP_EOPNOTSUPP  should be returned.
+ * More information about trim/unmap operations at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ *     Interface%20manuals/100293068c.pdf
+ */
+#define BLKIF_OP_DISCARD           5
+
 /*
  * Maximum scatter/gather segments per request.
  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
@@ -74,6 +104,11 @@ struct blkif_request_rw {
        } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
+struct blkif_request_discard {
+       blkif_sector_t sector_number;
+       uint64_t nr_sectors;
+};
+
 struct blkif_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        uint8_t        nr_segments;  /* number of segments                   */
@@ -81,6 +116,7 @@ struct blkif_request {
        uint64_t       id;           /* private guest value, echoed in resp  */
        union {
                struct blkif_request_rw rw;
+               struct blkif_request_discard discard;
        } u;
 };