Merge branch 'for-3.2/drivers' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
diff --combined block/genhd.c

index 024fc3944fb5b0a40311d5fde7d2c0cc950928a8,2429ecbbd97d07c8dd224c7bd9b3f554d14efa82..9253839714ff95b4acc6da413bd87f610ddce44c
--- 1/block/genhd.c
--- 2/block/genhd.c
+++ b/block/genhd.c
@@@ -19,7 -19,6 +19,7 @@@
   #include <linux/mutex.h>
   #include <linux/idr.h>
   #include <linux/log2.h>
+ +#include <linux/ctype.h>
   
   #include "blk.h"
   
@@@ -537,7 -536,7 +537,7 @@@ void register_disk(struct gendisk *disk
         disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
   
         /* No minors to use for partitions */
-       if (!disk_partitionable(disk))
+       if (!disk_part_scan_enabled(disk))
                 goto exit;
   
         /* No such device (e.g., media were just removed) */
@@@ -612,12 -611,6 +612,12 @@@ void add_disk(struct gendisk *disk
         register_disk(disk);
         blk_register_queue(disk);
   
+ +      /*
+ +       * Take an extra ref on queue which will be put on disk_release()
+ +       * so that it sticks around as long as @disk is there.
+ +       */
+ +      WARN_ON_ONCE(blk_get_queue(disk->queue));
+ +
         retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
                                    "bdi");
         WARN_ON(retval);
@@@ -848,7 -841,7 +848,7 @@@ static int show_partition(struct seq_fi
         char buf[BDEVNAME_SIZE];
   
         /* Don't show non-partitionable removeable devices or empty devices */
-       if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+       if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
                                    (sgp->flags & GENHD_FL_REMOVABLE)))
                 return 0;
         if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
@@@ -916,74 -909,6 +916,74 @@@ static int __init genhd_device_init(voi
   
   subsys_initcall(genhd_device_init);
   
+ +static ssize_t alias_show(struct device *dev,
+ +                             struct device_attribute *attr, char *buf)
+ +{
+ +      struct gendisk *disk = dev_to_disk(dev);
+ +      ssize_t ret = 0;
+ +
+ +      if (disk->alias)
+ +              ret = snprintf(buf, ALIAS_LEN, "%s\n", disk->alias);
+ +      return ret;
+ +}
+ +
+ +static ssize_t alias_store(struct device *dev, struct device_attribute *attr,
+ +                         const char *buf, size_t count)
+ +{
+ +      struct gendisk *disk = dev_to_disk(dev);
+ +      char *alias;
+ +      char *envp[] = { NULL, NULL };
+ +      unsigned char c;
+ +      int i;
+ +      ssize_t ret = count;
+ +
+ +      if (!count)
+ +              return -EINVAL;
+ +
+ +      if (count >= ALIAS_LEN) {
+ +              printk(KERN_ERR "alias: alias is too long\n");
+ +              return -EINVAL;
+ +      }
+ +
+ +      /* Validation check */
+ +      for (i = 0; i < count; i++) {
+ +              c = buf[i];
+ +              if (i == count - 1 && c == '\n')
+ +                      break;
+ +              if (!isalnum(c) && c != '_' && c != '-') {
+ +                      printk(KERN_ERR "alias: invalid alias\n");
+ +                      return -EINVAL;
+ +              }
+ +      }
+ +
+ +      if (disk->alias) {
+ +              printk(KERN_INFO "alias: %s is already assigned (%s)\n",
+ +                     disk->disk_name, disk->alias);
+ +              return -EINVAL;
+ +      }
+ +
+ +      alias = kasprintf(GFP_KERNEL, "%s", buf);
+ +      if (!alias)
+ +              return -ENOMEM;
+ +
+ +      if (alias[count - 1] == '\n')
+ +              alias[count - 1] = '\0';
+ +
+ +      envp[0] = kasprintf(GFP_KERNEL, "ALIAS=%s", alias);
+ +      if (!envp[0]) {
+ +              kfree(alias);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      disk->alias = alias;
+ +      printk(KERN_INFO "alias: assigned %s to %s\n", alias, disk->disk_name);
+ +
+ +      kobject_uevent_env(&dev->kobj, KOBJ_ADD, envp);
+ +
+ +      kfree(envp[0]);
+ +      return ret;
+ +}
+ +
   static ssize_t disk_range_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
   {
@@@ -1043,7 -968,6 +1043,7 @@@ static ssize_t disk_discard_alignment_s
         return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
   }
   
+ +static DEVICE_ATTR(alias, S_IRUGO|S_IWUSR, alias_show, alias_store);
   static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
   static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
   static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@@ -1066,7 -990,6 +1066,7 @@@ static struct device_attribute dev_attr
   #endif
   
   static struct attribute *disk_attrs[] = {
+ +      &dev_attr_alias.attr,
         &dev_attr_range.attr,
         &dev_attr_ext_range.attr,
         &dev_attr_removable.attr,
@@@ -1172,8 -1095,6 +1172,8 @@@ static void disk_release(struct device 
         disk_replace_part_tbl(disk, NULL);
         free_part_stats(&disk->part0);
         free_part_info(&disk->part0);
+ +      if (disk->queue)
+ +              blk_put_queue(disk->queue);
         kfree(disk);
   }
   struct class block_class = {
@@@ -1225,17 -1146,17 +1225,17 @@@ static int diskstats_show(struct seq_fi
                 cpu = part_stat_lock();
                 part_round_stats(cpu, hd);
                 part_stat_unlock();
- -              seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
- -                         "%u %lu %lu %llu %u %u %u %u\n",
+ +              seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
+ +                         "%u %lu %lu %lu %u %u %u %u\n",
                            MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                            disk_name(gp, hd->partno, buf),
                            part_stat_read(hd, ios[READ]),
                            part_stat_read(hd, merges[READ]),
- -                         (unsigned long long)part_stat_read(hd, sectors[READ]),
+ +                         part_stat_read(hd, sectors[READ]),
                            jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
                            part_stat_read(hd, ios[WRITE]),
                            part_stat_read(hd, merges[WRITE]),
- -                         (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+ +                         part_stat_read(hd, sectors[WRITE]),
                            jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
                            part_in_flight(hd),
                            jiffies_to_msecs(part_stat_read(hd, io_ticks)),
diff --combined drivers/block/loop.c

index c77983ea86c8798a35605206e35266087f764809,9b2f5d3c19abc96c1c594dd6d247140c64728086..3d806820280e3bc4aaa5e81d6bea411f6597e400
--- 1/drivers/block/loop.c
--- 2/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@@ -75,11 -75,12 +75,13 @@@
   #include <linux/kthread.h>
   #include <linux/splice.h>
   #include <linux/sysfs.h>
+ +#include <linux/miscdevice.h>
+ #include <linux/falloc.h>
+ 
   #include <asm/uaccess.h>
   
- -static LIST_HEAD(loop_devices);
- -static DEFINE_MUTEX(loop_devices_mutex);
+ +static DEFINE_IDR(loop_index_idr);
+ +static DEFINE_MUTEX(loop_index_mutex);
   
   static int max_part;
   static int part_shift;
@@@ -202,6 -203,74 +204,6 @@@ lo_do_transfer(struct loop_device *lo, 
         return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
   }
   
- -/**
- - * do_lo_send_aops - helper for writing data to a loop device
- - *
- - * This is the fast version for backing filesystems which implement the address
- - * space operations write_begin and write_end.
- - */
- -static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
- -              loff_t pos, struct page *unused)
- -{
- -      struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
- -      struct address_space *mapping = file->f_mapping;
- -      pgoff_t index;
- -      unsigned offset, bv_offs;
- -      int len, ret;
- -
- -      mutex_lock(&mapping->host->i_mutex);
- -      index = pos >> PAGE_CACHE_SHIFT;
- -      offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
- -      bv_offs = bvec->bv_offset;
- -      len = bvec->bv_len;
- -      while (len > 0) {
- -              sector_t IV;
- -              unsigned size, copied;
- -              int transfer_result;
- -              struct page *page;
- -              void *fsdata;
- -
- -              IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
- -              size = PAGE_CACHE_SIZE - offset;
- -              if (size > len)
- -                      size = len;
- -
- -              ret = pagecache_write_begin(file, mapping, pos, size, 0,
- -                                                      &page, &fsdata);
- -              if (ret)
- -                      goto fail;
- -
- -              file_update_time(file);
- -
- -              transfer_result = lo_do_transfer(lo, WRITE, page, offset,
- -                              bvec->bv_page, bv_offs, size, IV);
- -              copied = size;
- -              if (unlikely(transfer_result))
- -                      copied = 0;
- -
- -              ret = pagecache_write_end(file, mapping, pos, size, copied,
- -                                                      page, fsdata);
- -              if (ret < 0 || ret != copied)
- -                      goto fail;
- -
- -              if (unlikely(transfer_result))
- -                      goto fail;
- -
- -              bv_offs += copied;
- -              len -= copied;
- -              offset = 0;
- -              index++;
- -              pos += copied;
- -      }
- -      ret = 0;
- -out:
- -      mutex_unlock(&mapping->host->i_mutex);
- -      return ret;
- -fail:
- -      ret = -1;
- -      goto out;
- -}
- -
   /**
    * __do_lo_send_write - helper for writing data to a loop device
    *
@@@ -229,8 -298,10 +231,8 @@@ static int __do_lo_send_write(struct fi
   /**
    * do_lo_send_direct_write - helper for writing data to a loop device
    *
- - * This is the fast, non-transforming version for backing filesystems which do
- - * not implement the address space operations write_begin and write_end.
- - * It uses the write file operation which should be present on all writeable
- - * filesystems.
+ + * This is the fast, non-transforming version that does not need double
+ + * buffering.
    */
   static int do_lo_send_direct_write(struct loop_device *lo,
                 struct bio_vec *bvec, loff_t pos, struct page *page)
@@@ -246,9 -317,15 +248,9 @@@
   /**
    * do_lo_send_write - helper for writing data to a loop device
    *
- - * This is the slow, transforming version for filesystems which do not
- - * implement the address space operations write_begin and write_end.  It
- - * uses the write file operation which should be present on all writeable
- - * filesystems.
- - *
- - * Using fops->write is slower than using aops->{prepare,commit}_write in the
- - * transforming case because we need to double buffer the data as we cannot do
- - * the transformations in place as we do not have direct access to the
- - * destination pages of the backing file.
+ + * This is the slow, transforming version that needs to double buffer the
+ + * data as it cannot do the transformations in place without having direct
+ + * access to the destination pages of the backing file.
    */
   static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
                 loff_t pos, struct page *page)
@@@ -274,16 -351,17 +276,16 @@@ static int lo_send(struct loop_device *
         struct page *page = NULL;
         int i, ret = 0;
   
- -      do_lo_send = do_lo_send_aops;
- -      if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
+ +      if (lo->transfer != transfer_none) {
+ +              page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+ +              if (unlikely(!page))
+ +                      goto fail;
+ +              kmap(page);
+ +              do_lo_send = do_lo_send_write;
+ +      } else {
                 do_lo_send = do_lo_send_direct_write;
- -              if (lo->transfer != transfer_none) {
- -                      page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
- -                      if (unlikely(!page))
- -                              goto fail;
- -                      kmap(page);
- -                      do_lo_send = do_lo_send_write;
- -              }
         }
+ +
         bio_for_each_segment(bvec, bio, i) {
                 ret = do_lo_send(lo, bvec, pos, page);
                 if (ret < 0)
@@@ -407,6 -485,29 +409,29 @@@ static int do_bio_filebacked(struct loo
                         }
                 }
   
+               /*
+                * We use punch hole to reclaim the free space used by the
+                * image a.k.a. discard. However we do support discard if
+                * encryption is enabled, because it may give an attacker
+                * useful information.
+                */
+               if (bio->bi_rw & REQ_DISCARD) {
+                       struct file *file = lo->lo_backing_file;
+                       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+ 
+                       if ((!file->f_op->fallocate) ||
+                           lo->lo_encrypt_key_size) {
+                               ret = -EOPNOTSUPP;
+                               goto out;
+                       }
+                       ret = file->f_op->fallocate(file, mode, pos,
+                                                   bio->bi_size);
+                       if (unlikely(ret && ret != -EINVAL &&
+                                    ret != -EOPNOTSUPP))
+                               ret = -EIO;
+                       goto out;
+               }
+ 
                 ret = lo_send(lo, bio, pos);
   
                 if ((bio->bi_rw & REQ_FUA) && !ret) {
@@@ -437,7 -538,7 +462,7 @@@ static struct bio *loop_get_bio(struct 
         return bio_list_pop(&lo->lo_bio_list);
   }
   
- -static int loop_make_request(struct request_queue *q, struct bio *old_bio)
+ +static void loop_make_request(struct request_queue *q, struct bio *old_bio)
   {
         struct loop_device *lo = q->queuedata;
         int rw = bio_rw(old_bio);
@@@ -455,11 -556,12 +480,11 @@@
         loop_add_bio(lo, old_bio);
         wake_up(&lo->lo_event);
         spin_unlock_irq(&lo->lo_lock);
- -      return 0;
+ +      return;
   
   out:
         spin_unlock_irq(&lo->lo_lock);
         bio_io_error(old_bio);
- -      return 0;
   }
   
   struct switch_request {
@@@ -622,7 -724,7 +647,7 @@@ static int loop_change_fd(struct loop_d
                 goto out_putf;
   
         fput(old_file);
-       if (max_part > 0)
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN)
                 ioctl_by_bdev(bdev, BLKRRPART, 0);
         return 0;
   
@@@ -644,10 -746,17 +669,10 @@@ static inline int is_loop_device(struc
   static ssize_t loop_attr_show(struct device *dev, char *page,
                               ssize_t (*callback)(struct loop_device *, char *))
   {
- -      struct loop_device *l, *lo = NULL;
- -
- -      mutex_lock(&loop_devices_mutex);
- -      list_for_each_entry(l, &loop_devices, lo_list)
- -              if (disk_to_dev(l->lo_disk) == dev) {
- -                      lo = l;
- -                      break;
- -              }
- -      mutex_unlock(&loop_devices_mutex);
+ +      struct gendisk *disk = dev_to_disk(dev);
+ +      struct loop_device *lo = disk->private_data;
   
- -      return lo ? callback(lo, page) : -EIO;
+ +      return callback(lo, page);
   }
   
   #define LOOP_ATTR_RO(_name)                                           \
@@@ -665,10 -774,10 +690,10 @@@ static ssize_t loop_attr_backing_file_s
         ssize_t ret;
         char *p = NULL;
   
- -      mutex_lock(&lo->lo_ctl_mutex);
+ +      spin_lock_irq(&lo->lo_lock);
         if (lo->lo_backing_file)
                 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
- -      mutex_unlock(&lo->lo_ctl_mutex);
+ +      spin_unlock_irq(&lo->lo_lock);
   
         if (IS_ERR_OR_NULL(p))
                 ret = PTR_ERR(p);
@@@ -699,16 -808,25 +724,25 @@@ static ssize_t loop_attr_autoclear_show
         return sprintf(buf, "%s\n", autoclear ? "1" : "0");
   }
   
+ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
+ {
+       int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
+ 
+       return sprintf(buf, "%s\n", partscan ? "1" : "0");
+ }
+ 
   LOOP_ATTR_RO(backing_file);
   LOOP_ATTR_RO(offset);
   LOOP_ATTR_RO(sizelimit);
   LOOP_ATTR_RO(autoclear);
+ LOOP_ATTR_RO(partscan);
   
   static struct attribute *loop_attrs[] = {
         &loop_attr_backing_file.attr,
         &loop_attr_offset.attr,
         &loop_attr_sizelimit.attr,
         &loop_attr_autoclear.attr,
+       &loop_attr_partscan.attr,
         NULL,
   };
   
@@@ -729,6 -847,35 +763,35 @@@ static void loop_sysfs_exit(struct loop
                            &loop_attribute_group);
   }
   
+ static void loop_config_discard(struct loop_device *lo)
+ {
+       struct file *file = lo->lo_backing_file;
+       struct inode *inode = file->f_mapping->host;
+       struct request_queue *q = lo->lo_queue;
+ 
+       /*
+        * We use punch hole to reclaim the free space used by the
+        * image a.k.a. discard. However we do support discard if
+        * encryption is enabled, because it may give an attacker
+        * useful information.
+        */
+       if ((!file->f_op->fallocate) ||
+           lo->lo_encrypt_key_size) {
+               q->limits.discard_granularity = 0;
+               q->limits.discard_alignment = 0;
+               q->limits.max_discard_sectors = 0;
+               q->limits.discard_zeroes_data = 0;
+               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+               return;
+       }
+ 
+       q->limits.discard_granularity = inode->i_sb->s_blocksize;
+       q->limits.discard_alignment = inode->i_sb->s_blocksize;
+       q->limits.max_discard_sectors = UINT_MAX >> 9;
+       q->limits.discard_zeroes_data = 1;
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ }
+ 
   static int loop_set_fd(struct loop_device *lo, fmode_t mode,
                        struct block_device *bdev, unsigned int arg)
   {
@@@ -771,23 -918,35 +834,23 @@@
         mapping = file->f_mapping;
         inode = mapping->host;
   
- -      if (!(file->f_mode & FMODE_WRITE))
- -              lo_flags |= LO_FLAGS_READ_ONLY;
- -
         error = -EINVAL;
- -      if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- -              const struct address_space_operations *aops = mapping->a_ops;
- -
- -              if (aops->write_begin)
- -                      lo_flags |= LO_FLAGS_USE_AOPS;
- -              if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
- -                      lo_flags |= LO_FLAGS_READ_ONLY;
+ +      if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+ +              goto out_putf;
   
- -              lo_blocksize = S_ISBLK(inode->i_mode) ?
- -                      inode->i_bdev->bd_block_size : PAGE_SIZE;
+ +      if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
+ +          !file->f_op->write)
+ +              lo_flags |= LO_FLAGS_READ_ONLY;
   
- -              error = 0;
- -      } else {
- -              goto out_putf;
- -      }
+ +      lo_blocksize = S_ISBLK(inode->i_mode) ?
+ +              inode->i_bdev->bd_block_size : PAGE_SIZE;
   
+ +      error = -EFBIG;
         size = get_loop_size(lo, file);
- -
- -      if ((loff_t)(sector_t)size != size) {
- -              error = -EFBIG;
+ +      if ((loff_t)(sector_t)size != size)
                 goto out_putf;
- -      }
   
- -      if (!(mode & FMODE_WRITE))
- -              lo_flags |= LO_FLAGS_READ_ONLY;
+ +      error = 0;
   
         set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
   
@@@ -829,7 -988,9 +892,9 @@@
         }
         lo->lo_state = Lo_bound;
         wake_up_process(lo->lo_thread);
-       if (max_part > 0)
+       if (part_shift)
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN)
                 ioctl_by_bdev(bdev, BLKRRPART, 0);
         return 0;
   
@@@ -890,10 -1051,11 +955,11 @@@ loop_init_xfer(struct loop_device *lo, 
         return err;
   }
   
- static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
+ static int loop_clr_fd(struct loop_device *lo)
   {
         struct file *filp = lo->lo_backing_file;
         gfp_t gfp = lo->old_gfp_mask;
+       struct block_device *bdev = lo->lo_device;
   
         if (lo->lo_state != Lo_bound)
                 return -ENXIO;
@@@ -910,9 -1072,7 +976,9 @@@
   
         kthread_stop(lo->lo_thread);
   
+ +      spin_lock_irq(&lo->lo_lock);
         lo->lo_backing_file = NULL;
+ +      spin_unlock_irq(&lo->lo_lock);
   
         loop_release_xfer(lo);
         lo->transfer = NULL;
@@@ -922,7 -1082,6 +988,6 @@@
         lo->lo_offset = 0;
         lo->lo_sizelimit = 0;
         lo->lo_encrypt_key_size = 0;
-       lo->lo_flags = 0;
         lo->lo_thread = NULL;
         memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
         memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@@ -940,8 -1099,11 +1005,11 @@@
         lo->lo_state = Lo_unbound;
         /* This is safe: open() is still holding a reference. */
         module_put(THIS_MODULE);
-       if (max_part > 0 && bdev)
+       if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
                 ioctl_by_bdev(bdev, BLKRRPART, 0);
+       lo->lo_flags = 0;
+       if (!part_shift)
+               lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
         mutex_unlock(&lo->lo_ctl_mutex);
         /*
          * Need not hold lo_ctl_mutex to fput backing file.
@@@ -995,6 -1157,7 +1063,7 @@@ loop_set_status(struct loop_device *lo
                 if (figure_loop_size(lo))
                         return -EFBIG;
         }
+       loop_config_discard(lo);
   
         memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
         memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@@ -1010,6 -1173,13 +1079,13 @@@
              (info->lo_flags & LO_FLAGS_AUTOCLEAR))
                 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
   
+       if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
+            !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+               lo->lo_flags |= LO_FLAGS_PARTSCAN;
+               lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+               ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+       }
+ 
         lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
         lo->lo_init[0] = info->lo_init[0];
         lo->lo_init[1] = info->lo_init[1];
@@@ -1203,7 -1373,7 +1279,7 @@@ static int lo_ioctl(struct block_devic
                 break;
         case LOOP_CLR_FD:
                 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
-               err = loop_clr_fd(lo, bdev);
+               err = loop_clr_fd(lo);
                 if (!err)
                         goto out_unlocked;
                 break;
@@@ -1390,22 -1560,13 +1466,22 @@@ static int lo_compat_ioctl(struct block
   
   static int lo_open(struct block_device *bdev, fmode_t mode)
   {
- -      struct loop_device *lo = bdev->bd_disk->private_data;
+ +      struct loop_device *lo;
+ +      int err = 0;
+ +
+ +      mutex_lock(&loop_index_mutex);
+ +      lo = bdev->bd_disk->private_data;
+ +      if (!lo) {
+ +              err = -ENXIO;
+ +              goto out;
+ +      }
   
         mutex_lock(&lo->lo_ctl_mutex);
         lo->lo_refcnt++;
         mutex_unlock(&lo->lo_ctl_mutex);
- -
- -      return 0;
+ +out:
+ +      mutex_unlock(&loop_index_mutex);
+ +      return err;
   }
   
   static int lo_release(struct gendisk *disk, fmode_t mode)
@@@ -1423,7 -1584,7 +1499,7 @@@
                  * In autoclear mode, stop the loop thread
                  * and remove configuration after last close.
                  */
-               err = loop_clr_fd(lo, NULL);
+               err = loop_clr_fd(lo);
                 if (!err)
                         goto out_unlocked;
         } else {
@@@ -1471,71 -1632,40 +1547,71 @@@ int loop_register_transfer(struct loop_
         return 0;
   }
   
+ +static int unregister_transfer_cb(int id, void *ptr, void *data)
+ +{
+ +      struct loop_device *lo = ptr;
+ +      struct loop_func_table *xfer = data;
+ +
+ +      mutex_lock(&lo->lo_ctl_mutex);
+ +      if (lo->lo_encryption == xfer)
+ +              loop_release_xfer(lo);
+ +      mutex_unlock(&lo->lo_ctl_mutex);
+ +      return 0;
+ +}
+ +
   int loop_unregister_transfer(int number)
   {
         unsigned int n = number;
- -      struct loop_device *lo;
         struct loop_func_table *xfer;
   
         if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
                 return -EINVAL;
   
         xfer_funcs[n] = NULL;
- -
- -      list_for_each_entry(lo, &loop_devices, lo_list) {
- -              mutex_lock(&lo->lo_ctl_mutex);
- -
- -              if (lo->lo_encryption == xfer)
- -                      loop_release_xfer(lo);
- -
- -              mutex_unlock(&lo->lo_ctl_mutex);
- -      }
- -
+ +      idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
         return 0;
   }
   
   EXPORT_SYMBOL(loop_register_transfer);
   EXPORT_SYMBOL(loop_unregister_transfer);
   
- -static struct loop_device *loop_alloc(int i)
+ +static int loop_add(struct loop_device **l, int i)
   {
         struct loop_device *lo;
         struct gendisk *disk;
+ +      int err;
   
         lo = kzalloc(sizeof(*lo), GFP_KERNEL);
- -      if (!lo)
+ +      if (!lo) {
+ +              err = -ENOMEM;
                 goto out;
+ +      }
+ +
+ +      err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
+ +      if (err < 0)
+ +              goto out_free_dev;
+ +
+ +      if (i >= 0) {
+ +              int m;
+ +
+ +              /* create specific i in the index */
+ +              err = idr_get_new_above(&loop_index_idr, lo, i, &m);
+ +              if (err >= 0 && i != m) {
+ +                      idr_remove(&loop_index_idr, m);
+ +                      err = -EEXIST;
+ +              }
+ +      } else if (i == -1) {
+ +              int m;
+ +
+ +              /* get next free nr */
+ +              err = idr_get_new(&loop_index_idr, lo, &m);
+ +              if (err >= 0)
+ +                      i = m;
+ +      } else {
+ +              err = -EINVAL;
+ +      }
+ +      if (err < 0)
+ +              goto out_free_dev;
   
         lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
         if (!lo->lo_queue)
@@@ -1545,6 -1675,27 +1621,27 @@@
         if (!disk)
                 goto out_free_queue;
   
+       /*
+        * Disable partition scanning by default. The in-kernel partition
+        * scanning can be requested individually per-device during its
+        * setup. Userspace can always add and remove partitions from all
+        * devices. The needed partition minors are allocated from the
+        * extended minor space, the main loop device numbers will continue
+        * to match the loop minors, regardless of the number of partitions
+        * used.
+        *
+        * If max_part is given, partition scanning is globally enabled for
+        * all loop devices. The minors for the main loop devices will be
+        * multiples of max_part.
+        *
+        * Note: Global-for-all-devices, set-only-at-init, read-only module
+        * parameteters like 'max_loop' and 'max_part' make things needlessly
+        * complicated, are too static, inflexible and may surprise
+        * userspace tools. Parameters like this in general should be avoided.
+        */
+       if (!part_shift)
+               disk->flags |= GENHD_FL_NO_PART_SCAN;
+       disk->flags |= GENHD_FL_EXT_DEVT;
         mutex_init(&lo->lo_ctl_mutex);
         lo->lo_number           = i;
         lo->lo_thread           = NULL;
@@@ -1556,158 -1707,81 +1653,158 @@@
         disk->private_data      = lo;
         disk->queue             = lo->lo_queue;
         sprintf(disk->disk_name, "loop%d", i);
- -      return lo;
+ +      add_disk(disk);
+ +      *l = lo;
+ +      return lo->lo_number;
   
   out_free_queue:
         blk_cleanup_queue(lo->lo_queue);
   out_free_dev:
         kfree(lo);
   out:
- -      return NULL;
+ +      return err;
   }
   
- -static void loop_free(struct loop_device *lo)
+ +static void loop_remove(struct loop_device *lo)
   {
+ +      del_gendisk(lo->lo_disk);
         blk_cleanup_queue(lo->lo_queue);
         put_disk(lo->lo_disk);
- -      list_del(&lo->lo_list);
         kfree(lo);
   }
   
- -static struct loop_device *loop_init_one(int i)
+ +static int find_free_cb(int id, void *ptr, void *data)
+ +{
+ +      struct loop_device *lo = ptr;
+ +      struct loop_device **l = data;
+ +
+ +      if (lo->lo_state == Lo_unbound) {
+ +              *l = lo;
+ +              return 1;
+ +      }
+ +      return 0;
+ +}
+ +
+ +static int loop_lookup(struct loop_device **l, int i)
   {
         struct loop_device *lo;
+ +      int ret = -ENODEV;
   
- -      list_for_each_entry(lo, &loop_devices, lo_list) {
- -              if (lo->lo_number == i)
- -                      return lo;
+ +      if (i < 0) {
+ +              int err;
+ +
+ +              err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
+ +              if (err == 1) {
+ +                      *l = lo;
+ +                      ret = lo->lo_number;
+ +              }
+ +              goto out;
         }
   
- -      lo = loop_alloc(i);
+ +      /* lookup and return a specific i */
+ +      lo = idr_find(&loop_index_idr, i);
         if (lo) {
- -              add_disk(lo->lo_disk);
- -              list_add_tail(&lo->lo_list, &loop_devices);
+ +              *l = lo;
+ +              ret = lo->lo_number;
         }
- -      return lo;
- -}
- -
- -static void loop_del_one(struct loop_device *lo)
- -{
- -      del_gendisk(lo->lo_disk);
- -      loop_free(lo);
+ +out:
+ +      return ret;
   }
   
   static struct kobject *loop_probe(dev_t dev, int *part, void *data)
   {
         struct loop_device *lo;
         struct kobject *kobj;
+ +      int err;
   
- -      mutex_lock(&loop_devices_mutex);
- -      lo = loop_init_one(MINOR(dev) >> part_shift);
- -      kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
- -      mutex_unlock(&loop_devices_mutex);
+ +      mutex_lock(&loop_index_mutex);
+ +      err = loop_lookup(&lo, MINOR(dev) >> part_shift);
+ +      if (err < 0)
+ +              err = loop_add(&lo, MINOR(dev) >> part_shift);
+ +      if (err < 0)
+ +              kobj = ERR_PTR(err);
+ +      else
+ +              kobj = get_disk(lo->lo_disk);
+ +      mutex_unlock(&loop_index_mutex);
   
         *part = 0;
         return kobj;
   }
   
+ +static long loop_control_ioctl(struct file *file, unsigned int cmd,
+ +                             unsigned long parm)
+ +{
+ +      struct loop_device *lo;
+ +      int ret = -ENOSYS;
+ +
+ +      mutex_lock(&loop_index_mutex);
+ +      switch (cmd) {
+ +      case LOOP_CTL_ADD:
+ +              ret = loop_lookup(&lo, parm);
+ +              if (ret >= 0) {
+ +                      ret = -EEXIST;
+ +                      break;
+ +              }
+ +              ret = loop_add(&lo, parm);
+ +              break;
+ +      case LOOP_CTL_REMOVE:
+ +              ret = loop_lookup(&lo, parm);
+ +              if (ret < 0)
+ +                      break;
+ +              mutex_lock(&lo->lo_ctl_mutex);
+ +              if (lo->lo_state != Lo_unbound) {
+ +                      ret = -EBUSY;
+ +                      mutex_unlock(&lo->lo_ctl_mutex);
+ +                      break;
+ +              }
+ +              if (lo->lo_refcnt > 0) {
+ +                      ret = -EBUSY;
+ +                      mutex_unlock(&lo->lo_ctl_mutex);
+ +                      break;
+ +              }
+ +              lo->lo_disk->private_data = NULL;
+ +              mutex_unlock(&lo->lo_ctl_mutex);
+ +              idr_remove(&loop_index_idr, lo->lo_number);
+ +              loop_remove(lo);
+ +              break;
+ +      case LOOP_CTL_GET_FREE:
+ +              ret = loop_lookup(&lo, -1);
+ +              if (ret >= 0)
+ +                      break;
+ +              ret = loop_add(&lo, -1);
+ +      }
+ +      mutex_unlock(&loop_index_mutex);
+ +
+ +      return ret;
+ +}
+ +
+ +static const struct file_operations loop_ctl_fops = {
+ +      .open           = nonseekable_open,
+ +      .unlocked_ioctl = loop_control_ioctl,
+ +      .compat_ioctl   = loop_control_ioctl,
+ +      .owner          = THIS_MODULE,
+ +      .llseek         = noop_llseek,
+ +};
+ +
+ +static struct miscdevice loop_misc = {
+ +      .minor          = LOOP_CTRL_MINOR,
+ +      .name           = "loop-control",
+ +      .fops           = &loop_ctl_fops,
+ +};
+ +
+ +MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR);
+ +MODULE_ALIAS("devname:loop-control");
+ +
   static int __init loop_init(void)
   {
         int i, nr;
         unsigned long range;
- -      struct loop_device *lo, *next;
+ +      struct loop_device *lo;
+ +      int err;
   
- -      /*
- -       * loop module now has a feature to instantiate underlying device
- -       * structure on-demand, provided that there is an access dev node.
- -       * However, this will not work well with user space tool that doesn't
- -       * know about such "feature".  In order to not break any existing
- -       * tool, we do the following:
- -       *
- -       * (1) if max_loop is specified, create that many upfront, and this
- -       *     also becomes a hard limit.
- -       * (2) if max_loop is not specified, create 8 loop device on module
- -       *     load, user can further extend loop device by create dev node
- -       *     themselves and have kernel automatically instantiate actual
- -       *     device on-demand.
- -       */
+ +      err = misc_register(&loop_misc);
+ +      if (err < 0)
+ +              return err;
   
         part_shift = 0;
         if (max_part > 0) {
@@@ -1730,60 -1804,57 +1827,60 @@@
         if (max_loop > 1UL << (MINORBITS - part_shift))
                 return -EINVAL;
   
+ +      /*
+ +       * If max_loop is specified, create that many devices upfront.
+ +       * This also becomes a hard limit. If max_loop is not specified,
+ +       * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
+ +       * init time. Loop devices can be requested on-demand with the
+ +       * /dev/loop-control interface, or be instantiated by accessing
+ +       * a 'dead' device node.
+ +       */
         if (max_loop) {
                 nr = max_loop;
                 range = max_loop << part_shift;
         } else {
- -              nr = 8;
+ +              nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
                 range = 1UL << MINORBITS;
         }
   
         if (register_blkdev(LOOP_MAJOR, "loop"))
                 return -EIO;
   
- -      for (i = 0; i < nr; i++) {
- -              lo = loop_alloc(i);
- -              if (!lo)
- -                      goto Enomem;
- -              list_add_tail(&lo->lo_list, &loop_devices);
- -      }
- -
- -      /* point of no return */
- -
- -      list_for_each_entry(lo, &loop_devices, lo_list)
- -              add_disk(lo->lo_disk);
- -
         blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                   THIS_MODULE, loop_probe, NULL, NULL);
   
+ +      /* pre-create number of devices given by config or max_loop */
+ +      mutex_lock(&loop_index_mutex);
+ +      for (i = 0; i < nr; i++)
+ +              loop_add(&lo, i);
+ +      mutex_unlock(&loop_index_mutex);
+ +
         printk(KERN_INFO "loop: module loaded\n");
         return 0;
+ +}
   
- -Enomem:
- -      printk(KERN_INFO "loop: out of memory\n");
- -
- -      list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
- -              loop_free(lo);
+ +static int loop_exit_cb(int id, void *ptr, void *data)
+ +{
+ +      struct loop_device *lo = ptr;
   
- -      unregister_blkdev(LOOP_MAJOR, "loop");
- -      return -ENOMEM;
+ +      loop_remove(lo);
+ +      return 0;
   }
   
   static void __exit loop_exit(void)
   {
         unsigned long range;
- -      struct loop_device *lo, *next;
   
         range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
   
- -      list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
- -              loop_del_one(lo);
+ +      idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
+ +      idr_remove_all(&loop_index_idr);
+ +      idr_destroy(&loop_index_idr);
   
         blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
         unregister_blkdev(LOOP_MAJOR, "loop");
+ +
+ +      misc_deregister(&loop_misc);
   }
   
   module_init(loop_init);
diff --combined drivers/block/xen-blkback/blkback.c

index 1540792b1e547ee882fcbc168bb5d4156d5d1678,79efec24569bb9558414c16e14a07b3a66153205..15ec4db194d1bb793e4409dfed54563e4b71b080
--- 1/drivers/block/xen-blkback/blkback.c
--- 2/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@@ -39,6 -39,9 +39,9 @@@
   #include <linux/list.h>
   #include <linux/delay.h>
   #include <linux/freezer.h>
+ #include <linux/loop.h>
+ #include <linux/falloc.h>
+ #include <linux/fs.h>
   
   #include <xen/events.h>
   #include <xen/page.h>
@@@ -258,13 -261,16 +261,16 @@@ irqreturn_t xen_blkif_be_int(int irq, v
   
   static void print_stats(struct xen_blkif *blkif)
   {
-       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
+                "  |  ds %4d\n",
                  current->comm, blkif->st_oo_req,
-                blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+                blkif->st_rd_req, blkif->st_wr_req,
+                blkif->st_f_req, blkif->st_ds_req);
         blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
         blkif->st_rd_req = 0;
         blkif->st_wr_req = 0;
         blkif->st_oo_req = 0;
+       blkif->st_ds_req = 0;
   }
   
   int xen_blkif_schedule(void *arg)
@@@ -396,7 -402,7 +402,7 @@@ static int xen_blkbk_map(struct blkif_r
                         continue;
   
                 ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
- -                      blkbk->pending_page(pending_req, i), false);
+ +                      blkbk->pending_page(pending_req, i), NULL);
                 if (ret) {
                         pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
                                  (unsigned long)map[i].dev_bus_addr, ret);
@@@ -410,6 -416,59 +416,59 @@@
         return ret;
   }
   
+ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+ {
+       int err = 0;
+       int status = BLKIF_RSP_OKAY;
+       struct block_device *bdev = blkif->vbd.bdev;
+ 
+       if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+               /* just forward the discard request */
+               err = blkdev_issue_discard(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors,
+                               GFP_KERNEL, 0);
+       else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+               /* punch a hole in the backing file */
+               struct loop_device *lo = bdev->bd_disk->private_data;
+               struct file *file = lo->lo_backing_file;
+ 
+               if (file->f_op->fallocate)
+                       err = file->f_op->fallocate(file,
+                               FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                               req->u.discard.sector_number << 9,
+                               req->u.discard.nr_sectors << 9);
+               else
+                       err = -EOPNOTSUPP;
+       } else
+               err = -EOPNOTSUPP;
+ 
+       if (err == -EOPNOTSUPP) {
+               pr_debug(DRV_PFX "discard op failed, not supported\n");
+               status = BLKIF_RSP_EOPNOTSUPP;
+       } else if (err)
+               status = BLKIF_RSP_ERROR;
+ 
+       make_response(blkif, req->id, req->operation, status);
+ }
+ 
+ static void xen_blk_drain_io(struct xen_blkif *blkif)
+ {
+       atomic_set(&blkif->drain, 1);
+       do {
+               /* The initial value is one, and one refcnt taken at the
+                * start of the xen_blkif_schedule thread. */
+               if (atomic_read(&blkif->refcnt) <= 2)
+                       break;
+               wait_for_completion_interruptible_timeout(
+                               &blkif->drain_complete, HZ);
+ 
+               if (!atomic_read(&blkif->drain))
+                       break;
+       } while (!kthread_should_stop());
+       atomic_set(&blkif->drain, 0);
+ }
+ 
   /*
    * Completion callback on the bio's. Called as bh->b_end_io()
    */
@@@ -422,6 -481,11 +481,11 @@@ static void __end_block_io_op(struct pe
                 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
                 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
                 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+       } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+                   (error == -EOPNOTSUPP)) {
+               pr_debug(DRV_PFX "write barrier op failed, not supported\n");
+               xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+               pending_req->status = BLKIF_RSP_EOPNOTSUPP;
         } else if (error) {
                 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
                          " error=%d\n", error);
@@@ -438,6 -502,10 +502,10 @@@
                 make_response(pending_req->blkif, pending_req->id,
                               pending_req->operation, pending_req->status);
                 xen_blkif_put(pending_req->blkif);
+               if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
+                       if (atomic_read(&pending_req->blkif->drain))
+                               complete(&pending_req->blkif->drain_complete);
+               }
                 free_req(pending_req);
         }
   }
@@@ -532,7 -600,6 +600,6 @@@ do_block_io_op(struct xen_blkif *blkif
   
         return more_to_do;
   }
- 
   /*
    * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
    * and call the 'submit_bio' to pass it to the underlying storage.
@@@ -549,6 -616,7 +616,7 @@@ static int dispatch_rw_block_io(struct 
         int i, nbio = 0;
         int operation;
         struct blk_plug plug;
+       bool drain = false;
   
         switch (req->operation) {
         case BLKIF_OP_READ:
@@@ -559,11 -627,16 +627,16 @@@
                 blkif->st_wr_req++;
                 operation = WRITE_ODIRECT;
                 break;
+       case BLKIF_OP_WRITE_BARRIER:
+               drain = true;
         case BLKIF_OP_FLUSH_DISKCACHE:
                 blkif->st_f_req++;
                 operation = WRITE_FLUSH;
                 break;
-       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_DISCARD:
+               blkif->st_ds_req++;
+               operation = REQ_DISCARD;
+               break;
         default:
                 operation = 0; /* make gcc happy */
                 goto fail_response;
@@@ -572,7 -645,8 +645,8 @@@
   
         /* Check that the number of segments is sane. */
         nseg = req->nr_segments;
-       if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+       if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
+                               operation != REQ_DISCARD) ||
             unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                          nseg);
@@@ -621,16 -695,25 +695,25 @@@
                 }
         }
   
+       /* Wait on all outstanding I/O's and once that has been completed
+        * issue the WRITE_FLUSH.
+        */
+       if (drain)
+               xen_blk_drain_io(pending_req->blkif);
+ 
         /*
          * If we have failed at this point, we need to undo the M2P override,
          * set gnttab_set_unmap_op on all of the grant references and perform
          * the hypercall to unmap the grants - that is all done in
          * xen_blkbk_unmap.
          */
-       if (xen_blkbk_map(req, pending_req, seg))
+       if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
                 goto fail_flush;
   
-       /* This corresponding xen_blkif_put is done in __end_block_io_op */
+       /*
+        * This corresponding xen_blkif_put is done in __end_block_io_op, or
+        * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
+        */
         xen_blkif_get(blkif);
   
         for (i = 0; i < nseg; i++) {
@@@ -654,18 -737,25 +737,25 @@@
                 preq.sector_number += seg[i].nsec;
         }
   
-       /* This will be hit if the operation was a flush. */
+       /* This will be hit if the operation was a flush or discard. */
         if (!bio) {
-               BUG_ON(operation != WRITE_FLUSH);
+               BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
   
-               bio = bio_alloc(GFP_KERNEL, 0);
-               if (unlikely(bio == NULL))
-                       goto fail_put_bio;
+               if (operation == WRITE_FLUSH) {
+                       bio = bio_alloc(GFP_KERNEL, 0);
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
   
-               biolist[nbio++] = bio;
-               bio->bi_bdev    = preq.bdev;
-               bio->bi_private = pending_req;
-               bio->bi_end_io  = end_block_io_op;
+                       biolist[nbio++] = bio;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+               } else if (operation == REQ_DISCARD) {
+                       xen_blk_discard(blkif, req);
+                       xen_blkif_put(blkif);
+                       free_req(pending_req);
+                       return 0;
+               }
         }
   
         /*
@@@ -685,7 -775,7 +775,7 @@@
   
         if (operation == READ)
                 blkif->st_rd_sect += preq.nr_sects;
-       else if (operation == WRITE || operation == WRITE_FLUSH)
+       else if (operation & WRITE)
                 blkif->st_wr_sect += preq.nr_sects;
   
         return 0;
@@@ -765,9 -855,9 +855,9 @@@ static int __init xen_blkif_init(void
   
         mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
   
-       blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+       blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
                                         xen_blkif_reqs, GFP_KERNEL);
-       blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+       blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
                                         mmap_pages, GFP_KERNEL);
         blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
                                         mmap_pages, GFP_KERNEL);
@@@ -790,8 -880,6 +880,6 @@@
         if (rc)
                 goto failed_init;
   
-       memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
- 
         INIT_LIST_HEAD(&blkbk->pending_free);
         spin_lock_init(&blkbk->pending_free_lock);
         init_waitqueue_head(&blkbk->pending_free_wq);
diff --combined drivers/block/xen-blkback/common.h

index c4bd34063ecc8f01303932d4ec35d239e2f0d4a0,e638457d9de44d3305011207b333cf85cacb2757..de09f525d6c174509af3a1e25ce6609fbb2ef1fc
--- 1/drivers/block/xen-blkback/common.h
--- 2/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@@ -27,6 -27,7 +27,6 @@@
   #ifndef __XEN_BLKIF__BACKEND__COMMON_H__
   #define __XEN_BLKIF__BACKEND__COMMON_H__
   
- -#include <linux/version.h>
   #include <linux/module.h>
   #include <linux/interrupt.h>
   #include <linux/slab.h>
@@@ -62,13 -63,26 +62,26 @@@ struct blkif_common_response 
   
   /* i386 protocol version */
   #pragma pack(push, 4)
+ 
+ struct blkif_x86_32_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ };
+ 
+ struct blkif_x86_32_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+ };
+ 
   struct blkif_x86_32_request {
         uint8_t        operation;    /* BLKIF_OP_???                         */
         uint8_t        nr_segments;  /* number of segments                   */
         blkif_vdev_t   handle;       /* only for read/write requests         */
         uint64_t       id;           /* private guest value, echoed in resp  */
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_32_request_rw rw;
+               struct blkif_x86_32_request_discard discard;
+       } u;
   };
   struct blkif_x86_32_response {
         uint64_t        id;              /* copied from request */
@@@ -78,13 -92,26 +91,26 @@@
   #pragma pack(pop)
   
   /* x86_64 protocol version */
+ 
+ struct blkif_x86_64_request_rw {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ };
+ 
+ struct blkif_x86_64_request_discard {
+       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+       uint64_t nr_sectors;
+ };
+ 
   struct blkif_x86_64_request {
         uint8_t        operation;    /* BLKIF_OP_???                         */
         uint8_t        nr_segments;  /* number of segments                   */
         blkif_vdev_t   handle;       /* only for read/write requests         */
         uint64_t       __attribute__((__aligned__(8))) id;
-       blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-       struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       union {
+               struct blkif_x86_64_request_rw rw;
+               struct blkif_x86_64_request_discard discard;
+       } u;
   };
   struct blkif_x86_64_response {
         uint64_t       __attribute__((__aligned__(8))) id;
@@@ -112,6 -139,11 +138,11 @@@ enum blkif_protocol 
         BLKIF_PROTOCOL_X86_64 = 3,
   };
   
+ enum blkif_backend_type {
+       BLKIF_BACKEND_PHY  = 1,
+       BLKIF_BACKEND_FILE = 2,
+ };
+ 
   struct xen_vbd {
         /* What the domain refers to this vbd as. */
         blkif_vdev_t            handle;
@@@ -137,6 -169,7 +168,7 @@@ struct xen_blkif 
         unsigned int            irq;
         /* Comms information. */
         enum blkif_protocol     blk_protocol;
+       enum blkif_backend_type blk_backend_type;
         union blkif_back_rings  blk_rings;
         struct vm_struct        *blk_ring_area;
         /* The VBD attached to this interface. */
@@@ -148,6 -181,9 +180,9 @@@
         atomic_t                refcnt;
   
         wait_queue_head_t       wq;
+       /* for barrier (drain) requests */
+       struct completion       drain_complete;
+       atomic_t                drain;
         /* One thread per one blkif. */
         struct task_struct      *xenblkd;
         unsigned int            waiting_reqs;
@@@ -158,6 -194,7 +193,7 @@@
         int                     st_wr_req;
         int                     st_oo_req;
         int                     st_f_req;
+       int                     st_ds_req;
         int                     st_rd_sect;
         int                     st_wr_sect;
   
@@@ -181,7 -218,7 +217,7 @@@
   
   struct phys_req {
         unsigned short          dev;
-       unsigned short          nr_sects;
+       blkif_sector_t          nr_sects;
         struct block_device     *bdev;
         blkif_sector_t          sector_number;
   };
@@@ -195,6 -232,8 +231,8 @@@ int xen_blkif_schedule(void *arg)
   int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                               struct backend_info *be, int state);
   
+ int xen_blkbk_barrier(struct xenbus_transaction xbt,
+                     struct backend_info *be, int state);
   struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
   
   static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@@ -205,12 -244,25 +243,25 @@@
         dst->nr_segments = src->nr_segments;
         dst->handle = src->handle;
         dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
   }
   
   static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@@ -221,12 -273,25 +272,25 @@@
         dst->nr_segments = src->nr_segments;
         dst->handle = src->handle;
         dst->id = src->id;
-       dst->u.rw.sector_number = src->sector_number;
-       barrier();
-       if (n > dst->nr_segments)
-               n = dst->nr_segments;
-       for (i = 0; i < n; i++)
-               dst->u.rw.seg[i] = src->seg[i];
+       switch (src->operation) {
+       case BLKIF_OP_READ:
+       case BLKIF_OP_WRITE:
+       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_FLUSH_DISKCACHE:
+               dst->u.rw.sector_number = src->u.rw.sector_number;
+               barrier();
+               if (n > dst->nr_segments)
+                       n = dst->nr_segments;
+               for (i = 0; i < n; i++)
+                       dst->u.rw.seg[i] = src->u.rw.seg[i];
+               break;
+       case BLKIF_OP_DISCARD:
+               dst->u.discard.sector_number = src->u.discard.sector_number;
+               dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+               break;
+       default:
+               break;
+       }
   }
   
   #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --combined drivers/block/xen-blkback/xenbus.c

index 5fd2010f7d2bd96e1dc7b4c290fa71299d480d14,a6d43030b1078e027e4c0ccb41cea7070518e428..2c008afe63d9dbb5499712f9abd3333904a35a3c
--- 1/drivers/block/xen-blkback/xenbus.c
--- 2/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@@ -114,6 -114,8 +114,8 @@@ static struct xen_blkif *xen_blkif_allo
         spin_lock_init(&blkif->blk_ring_lock);
         atomic_set(&blkif->refcnt, 1);
         init_waitqueue_head(&blkif->wq);
+       init_completion(&blkif->drain_complete);
+       atomic_set(&blkif->drain, 0);
         blkif->st_print = jiffies;
         init_waitqueue_head(&blkif->waiting_to_free);
   
@@@ -272,6 -274,7 +274,7 @@@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st
   VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
   VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
   VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
+ VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
   VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
   VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
   
@@@ -280,6 -283,7 +283,7 @@@ static struct attribute *xen_vbdstat_at
         &dev_attr_rd_req.attr,
         &dev_attr_wr_req.attr,
         &dev_attr_f_req.attr,
+       &dev_attr_ds_req.attr,
         &dev_attr_rd_sect.attr,
         &dev_attr_wr_sect.attr,
         NULL
@@@ -419,6 -423,73 +423,73 @@@ int xen_blkbk_flush_diskcache(struct xe
         return err;
   }
   
+ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+ {
+       struct xenbus_device *dev = be->dev;
+       struct xen_blkif *blkif = be->blkif;
+       char *type;
+       int err;
+       int state = 0;
+ 
+       type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+       if (!IS_ERR(type)) {
+               if (strncmp(type, "file", 4) == 0) {
+                       state = 1;
+                       blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+               }
+               if (strncmp(type, "phy", 3) == 0) {
+                       struct block_device *bdev = be->blkif->vbd.bdev;
+                       struct request_queue *q = bdev_get_queue(bdev);
+                       if (blk_queue_discard(q)) {
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-granularity", "%u",
+                                       q->limits.discard_granularity);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-granularity");
+                                       goto kfree;
+                               }
+                               err = xenbus_printf(xbt, dev->nodename,
+                                       "discard-alignment", "%u",
+                                       q->limits.discard_alignment);
+                               if (err) {
+                                       xenbus_dev_fatal(dev, err,
+                                               "writing discard-alignment");
+                                       goto kfree;
+                               }
+                               state = 1;
+                               blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+                       }
+               }
+       } else {
+               err = PTR_ERR(type);
+               xenbus_dev_fatal(dev, err, "reading type");
+               goto out;
+       }
+ 
+       err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-discard");
+ kfree:
+       kfree(type);
+ out:
+       return err;
+ }
+ int xen_blkbk_barrier(struct xenbus_transaction xbt,
+                     struct backend_info *be, int state)
+ {
+       struct xenbus_device *dev = be->dev;
+       int err;
+ 
+       err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-barrier");
+ 
+       return err;
+ }
+ 
   /*
    * Entry point to this code when a new device is created.  Allocate the basic
    * structures, and watch the store waiting for the hotplug scripts to tell us
@@@ -601,11 -672,11 +672,11 @@@ static void frontend_changed(struct xen
                 break;
   
         case XenbusStateClosing:
- -              xen_blkif_disconnect(be->blkif);
                 xenbus_switch_state(dev, XenbusStateClosing);
                 break;
   
         case XenbusStateClosed:
+ +              xen_blkif_disconnect(be->blkif);
                 xenbus_switch_state(dev, XenbusStateClosed);
                 if (xenbus_dev_is_online(dev))
                         break;
@@@ -650,6 -721,11 +721,11 @@@ again
         if (err)
                 goto abort;
   
+       err = xen_blkbk_discard(xbt, be);
+ 
+       /* If we can't advertise it is OK. */
+       err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+ 
         err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                             (unsigned long long)vbd_sz(&be->blkif->vbd));
         if (err) {
diff --combined drivers/block/xen-blkfront.c

index 9ea8c2576c70e768f22ad8ea2cc423337611fb4f,773da7d6491e882ab30e980cb0bb7699c8d72ee2..7b2ec5908413da7b989044828879afa173613b87
--- 1/drivers/block/xen-blkfront.c
--- 2/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@@ -98,6 -98,9 +98,9 @@@ struct blkfront_inf
         unsigned long shadow_free;
         unsigned int feature_flush;
         unsigned int flush_op;
+       unsigned int feature_discard;
+       unsigned int discard_granularity;
+       unsigned int discard_alignment;
         int is_ready;
   };
   
@@@ -123,8 -126,8 +126,8 @@@ static DEFINE_SPINLOCK(minor_lock)
   #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
   #define EMULATED_HD_DISK_MINOR_OFFSET (0)
   #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
- -#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
- -#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
+ +#define EMULATED_SD_DISK_MINOR_OFFSET (0)
+ +#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
   
   #define DEV_NAME      "xvd"   /* name in /dev */
   
@@@ -302,29 -305,36 +305,36 @@@ static int blkif_queue_request(struct r
                 ring_req->operation = info->flush_op;
         }
   
-       ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
-       BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+               /* id, sector_number and handle are set above. */
+               ring_req->operation = BLKIF_OP_DISCARD;
+               ring_req->nr_segments = 0;
+               ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+       } else {
+               ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
+               BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
   
-       for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
-               buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
-               fsect = sg->offset >> 9;
-               lsect = fsect + (sg->length >> 9) - 1;
-               /* install a grant reference. */
-               ref = gnttab_claim_grant_reference(&gref_head);
-               BUG_ON(ref == -ENOSPC);
+               for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+                       buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
+                       fsect = sg->offset >> 9;
+                       lsect = fsect + (sg->length >> 9) - 1;
+                       /* install a grant reference. */
+                       ref = gnttab_claim_grant_reference(&gref_head);
+                       BUG_ON(ref == -ENOSPC);
   
-               gnttab_grant_foreign_access_ref(
-                               ref,
-                               info->xbdev->otherend_id,
-                               buffer_mfn,
-                               rq_data_dir(req) );
- 
-               info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
-               ring_req->u.rw.seg[i] =
-                               (struct blkif_request_segment) {
-                                       .gref       = ref,
-                                       .first_sect = fsect,
-                                       .last_sect  = lsect };
+                       gnttab_grant_foreign_access_ref(
+                                       ref,
+                                       info->xbdev->otherend_id,
+                                       buffer_mfn,
+                                       rq_data_dir(req));
+ 
+                       info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+                       ring_req->u.rw.seg[i] =
+                                       (struct blkif_request_segment) {
+                                               .gref       = ref,
+                                               .first_sect = fsect,
+                                               .last_sect  = lsect };
+               }
         }
   
         info->ring.req_prod_pvt++;
@@@ -370,7 -380,9 +380,9 @@@ static void do_blkif_request(struct req
   
                 blk_start_request(req);
   
-               if (req->cmd_type != REQ_TYPE_FS) {
+               if ((req->cmd_type != REQ_TYPE_FS) ||
+                   ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
+                   !info->flush_op)) {
                         __blk_end_request_all(req, -EIO);
                         continue;
                 }
@@@ -399,6 -411,7 +411,7 @@@ wait
   static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
   {
         struct request_queue *rq;
+       struct blkfront_info *info = gd->private_data;
   
         rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
         if (rq == NULL)
@@@ -406,6 -419,13 +419,13 @@@
   
         queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
   
+       if (info->feature_discard) {
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+               blk_queue_max_discard_sectors(rq, get_capacity(gd));
+               rq->limits.discard_granularity = info->discard_granularity;
+               rq->limits.discard_alignment = info->discard_alignment;
+       }
+ 
         /* Hard sector size and max sectors impersonate the equiv. hardware. */
         blk_queue_logical_block_size(rq, sector_size);
         blk_queue_max_hw_sectors(rq, 512);
@@@ -529,7 -549,7 +549,7 @@@ static int xlvbd_alloc_gendisk(blkif_se
                 minor = BLKIF_MINOR_EXT(info->vdevice);
                 nr_parts = PARTS_PER_EXT_DISK;
                 offset = minor / nr_parts;
- -              if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
+ +              if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
                         printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
                                         "emulated IDE disks,\n\t choose an xvd device name"
                                         "from xvde on\n", info->vdevice);
@@@ -722,6 -742,17 +742,17 @@@ static irqreturn_t blkif_interrupt(int 
   
                 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
                 switch (bret->operation) {
+               case BLKIF_OP_DISCARD:
+                       if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+                               struct request_queue *rq = info->rq;
+                               printk(KERN_WARNING "blkfront: %s: discard op failed\n",
+                                          info->gd->disk_name);
+                               error = -EOPNOTSUPP;
+                               info->feature_discard = 0;
+                               queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+                       }
+                       __blk_end_request_all(req, error);
+                       break;
                 case BLKIF_OP_FLUSH_DISKCACHE:
                 case BLKIF_OP_WRITE_BARRIER:
                         if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@@ -1098,6 -1129,33 +1129,33 @@@ blkfront_closing(struct blkfront_info *
         bdput(bdev);
   }
   
+ static void blkfront_setup_discard(struct blkfront_info *info)
+ {
+       int err;
+       char *type;
+       unsigned int discard_granularity;
+       unsigned int discard_alignment;
+ 
+       type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
+       if (IS_ERR(type))
+               return;
+ 
+       if (strncmp(type, "phy", 3) == 0) {
+               err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                       "discard-granularity", "%u", &discard_granularity,
+                       "discard-alignment", "%u", &discard_alignment,
+                       NULL);
+               if (!err) {
+                       info->feature_discard = 1;
+                       info->discard_granularity = discard_granularity;
+                       info->discard_alignment = discard_alignment;
+               }
+       } else if (strncmp(type, "file", 4) == 0)
+               info->feature_discard = 1;
+ 
+       kfree(type);
+ }
+ 
   /*
    * Invoked when the backend is finally 'ready' (and has told produced
    * the details about the physical device - #sectors, size, etc).
@@@ -1108,7 -1166,7 +1166,7 @@@ static void blkfront_connect(struct blk
         unsigned long sector_size;
         unsigned int binfo;
         int err;
-       int barrier, flush;
+       int barrier, flush, discard;
   
         switch (info->connected) {
         case BLKIF_STATE_CONNECTED:
@@@ -1178,7 -1236,14 +1236,14 @@@
                 info->feature_flush = REQ_FLUSH;
                 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
         }
-               
+ 
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "feature-discard", "%d", &discard,
+                           NULL);
+ 
+       if (!err && discard)
+               blkfront_setup_discard(info);
+ 
         err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
         if (err) {
                 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@@ -1385,6 -1450,8 +1450,8 @@@ static struct xenbus_driver blkfront = 
   
   static int __init xlblk_init(void)
   {
+       int ret;
+ 
         if (!xen_domain())
                 return -ENODEV;
   
@@@ -1394,7 -1461,13 +1461,13 @@@
                 return -ENODEV;
         }
   
-       return xenbus_register_frontend(&blkfront);
+       ret = xenbus_register_frontend(&blkfront);
+       if (ret) {
+               unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
+               return ret;
+       }
+ 
+       return 0;
   }
   module_init(xlblk_init);
   
diff --combined drivers/scsi/hpsa.c

index 9825ecf3495793cc5a6c1b166337d9ab85a0cf58,381929813cbdc284233877ed6d33a1eab855d1af..bbdc9f960a66fd7105a314f89e900e5b17492af9
--- 1/drivers/scsi/hpsa.c
--- 2/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@@ -676,16 -676,6 +676,16 @@@ static void hpsa_scsi_replace_entry(str
         BUG_ON(entry < 0 || entry >= HPSA_MAX_SCSI_DEVS_PER_HBA);
         removed[*nremoved] = h->dev[entry];
         (*nremoved)++;
+ +
+ +      /*
+ +       * New physical devices won't have target/lun assigned yet
+ +       * so we need to preserve the values in the slot we are replacing.
+ +       */
+ +      if (new_entry->target == -1) {
+ +              new_entry->target = h->dev[entry]->target;
+ +              new_entry->lun = h->dev[entry]->lun;
+ +      }
+ +
         h->dev[entry] = new_entry;
         added[*nadded] = new_entry;
         (*nadded)++;
@@@ -1558,17 -1548,10 +1558,17 @@@ static inline void hpsa_set_bus_target_
   }
   
   static int hpsa_update_device_info(struct ctlr_info *h,
- -      unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device)
+ +      unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
+ +      unsigned char *is_OBDR_device)
   {
- -#define OBDR_TAPE_INQ_SIZE 49
+ +
+ +#define OBDR_SIG_OFFSET 43
+ +#define OBDR_TAPE_SIG "$DR-10"
+ +#define OBDR_SIG_LEN (sizeof(OBDR_TAPE_SIG) - 1)
+ +#define OBDR_TAPE_INQ_SIZE (OBDR_SIG_OFFSET + OBDR_SIG_LEN)
+ +
         unsigned char *inq_buff;
+ +      unsigned char *obdr_sig;
   
         inq_buff = kzalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
         if (!inq_buff)
@@@ -1600,16 -1583,6 +1600,16 @@@
         else
                 this_device->raid_level = RAID_UNKNOWN;
   
+ +      if (is_OBDR_device) {
+ +              /* See if this is a One-Button-Disaster-Recovery device
+ +               * by looking for "$DR-10" at offset 43 in inquiry data.
+ +               */
+ +              obdr_sig = &inq_buff[OBDR_SIG_OFFSET];
+ +              *is_OBDR_device = (this_device->devtype == TYPE_ROM &&
+ +                                      strncmp(obdr_sig, OBDR_TAPE_SIG,
+ +                                              OBDR_SIG_LEN) == 0);
+ +      }
+ +
         kfree(inq_buff);
         return 0;
   
@@@ -1743,7 -1716,7 +1743,7 @@@ static int add_msa2xxx_enclosure_device
                 return 0;
         }
   
- -      if (hpsa_update_device_info(h, scsi3addr, this_device))
+ +      if (hpsa_update_device_info(h, scsi3addr, this_device, NULL))
                 return 0;
         (*nmsa2xxx_enclosures)++;
         hpsa_set_bus_target_lun(this_device, bus, target, 0);
@@@ -1835,6 -1808,7 +1835,6 @@@ static void hpsa_update_scsi_devices(st
          */
         struct ReportLUNdata *physdev_list = NULL;
         struct ReportLUNdata *logdev_list = NULL;
- -      unsigned char *inq_buff = NULL;
         u32 nphysicals = 0;
         u32 nlogicals = 0;
         u32 ndev_allocated = 0;
@@@ -1850,9 -1824,11 +1850,9 @@@
                 GFP_KERNEL);
         physdev_list = kzalloc(reportlunsize, GFP_KERNEL);
         logdev_list = kzalloc(reportlunsize, GFP_KERNEL);
- -      inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
         tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL);
   
- -      if (!currentsd || !physdev_list || !logdev_list ||
- -              !inq_buff || !tmpdevice) {
+ +      if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) {
                 dev_err(&h->pdev->dev, "out of memory\n");
                 goto out;
         }
@@@ -1887,7 -1863,7 +1887,7 @@@
         /* adjust our table of devices */
         nmsa2xxx_enclosures = 0;
         for (i = 0; i < nphysicals + nlogicals + 1; i++) {
- -              u8 *lunaddrbytes;
+ +              u8 *lunaddrbytes, is_OBDR = 0;
   
                 /* Figure out where the LUN ID info is coming from */
                 lunaddrbytes = figure_lunaddrbytes(h, raid_ctlr_position,
@@@ -1898,8 -1874,7 +1898,8 @@@
                         continue;
   
                 /* Get device type, vendor, model, device id */
- -              if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice))
+ +              if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice,
+ +                                                      &is_OBDR))
                         continue; /* skip it if we can't talk to it. */
                 figure_bus_target_lun(h, lunaddrbytes, &bus, &target, &lun,
                         tmpdevice);
@@@ -1923,7 -1898,7 +1923,7 @@@
                 hpsa_set_bus_target_lun(this_device, bus, target, lun);
   
                 switch (this_device->devtype) {
- -              case TYPE_ROM: {
+ +              case TYPE_ROM:
                         /* We don't *really* support actual CD-ROM devices,
                          * just "One Button Disaster Recovery" tape drive
                          * which temporarily pretends to be a CD-ROM drive.
@@@ -1931,8 -1906,15 +1931,8 @@@
                          * device by checking for "$DR-10" in bytes 43-48 of
                          * the inquiry data.
                          */
- -                              char obdr_sig[7];
- -#define OBDR_TAPE_SIG "$DR-10"
- -                              strncpy(obdr_sig, &inq_buff[43], 6);
- -                              obdr_sig[6] = '\0';
- -                              if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
- -                                      /* Not OBDR device, ignore it. */
- -                                      break;
- -                      }
- -                      ncurrent++;
+ +                      if (is_OBDR)
+ +                              ncurrent++;
                         break;
                 case TYPE_DISK:
                         if (i < nphysicals)
@@@ -1965,6 -1947,7 +1965,6 @@@ out
         for (i = 0; i < ndev_allocated; i++)
                 kfree(currentsd[i]);
         kfree(currentsd);
- -      kfree(inq_buff);
         kfree(physdev_list);
         kfree(logdev_list);
   }
@@@ -3300,6 -3283,13 +3300,13 @@@ static int hpsa_controller_hard_reset(s
                 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
                 pmcsr |= PCI_D0;
                 pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+ 
+               /*
+                * The P600 requires a small delay when changing states.
+                * Otherwise we may think the board did not reset and we bail.
+                * This for kdump only and is particular to the P600.
+                */
+               msleep(500);
         }
         return 0;
   }
@@@ -3438,8 -3428,10 +3445,8 @@@ static __devinit int hpsa_kdump_hard_re
         } else {
                 use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
                 if (use_doorbell) {
- -                      dev_warn(&pdev->dev, "Controller claims that "
- -                              "'Bit 2 doorbell reset' is "
- -                              "supported, but not 'bit 5 doorbell reset'.  "
- -                              "Firmware update is recommended.\n");
+ +                      dev_warn(&pdev->dev, "Soft reset not supported. "
+ +                              "Firmware update is required.\n");
                         rc = -ENOTSUPP; /* try soft reset */
                         goto unmap_cfgtable;
                 }
diff --combined fs/block_dev.c

index 1c44b8d54504e1c633774fd6befe966f0c1e7dc1,0bed0d4588dd87f103c5239d66f6058461f89253..b07f1da1de4e34470fd64af913c9366e0d6c8513
--- 1/fs/block_dev.c
--- 2/fs/block_dev.c
+++ b/fs/block_dev.c
@@@ -971,7 -971,7 +971,7 @@@ static void flush_disk(struct block_dev
   
         if (!bdev->bd_disk)
                 return;
-       if (disk_partitionable(bdev->bd_disk))
+       if (disk_part_scan_enabled(bdev->bd_disk))
                 bdev->bd_invalidated = 1;
   }
   
@@@ -1085,7 -1085,6 +1085,7 @@@ static int __blkdev_put(struct block_de
   static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
   {
         struct gendisk *disk;
+ +      struct module *owner;
         int ret;
         int partno;
         int perm = 0;
@@@ -1111,7 -1110,6 +1111,7 @@@
         disk = get_gendisk(bdev->bd_dev, &partno);
         if (!disk)
                 goto out;
+ +      owner = disk->fops->owner;
   
         disk_block_events(disk);
         mutex_lock_nested(&bdev->bd_mutex, for_part);
@@@ -1139,8 -1137,8 +1139,8 @@@
                                         bdev->bd_disk = NULL;
                                         mutex_unlock(&bdev->bd_mutex);
                                         disk_unblock_events(disk);
- -                                      module_put(disk->fops->owner);
                                         put_disk(disk);
+ +                                      module_put(owner);
                                         goto restart;
                                 }
                         }
@@@ -1196,8 -1194,8 +1196,8 @@@
                                 goto out_unlock_bdev;
                 }
                 /* only one opener holds refs to the module and disk */
- -              module_put(disk->fops->owner);
                 put_disk(disk);
+ +              module_put(owner);
         }
         bdev->bd_openers++;
         if (for_part)
@@@ -1217,8 -1215,8 +1217,8 @@@
    out_unlock_bdev:
         mutex_unlock(&bdev->bd_mutex);
         disk_unblock_events(disk);
- -      module_put(disk->fops->owner);
         put_disk(disk);
+ +      module_put(owner);
    out:
         bdput(bdev);
   
@@@ -1431,11 -1429,6 +1431,11 @@@ static int __blkdev_put(struct block_de
                 WARN_ON_ONCE(bdev->bd_holders);
                 sync_blockdev(bdev);
                 kill_bdev(bdev);
+ +              /* ->release can cause the old bdi to disappear,
+ +               * so must switch it out first
+ +               */
+ +              bdev_inode_switch_bdi(bdev->bd_inode,
+ +                                      &default_backing_dev_info);
         }
         if (bdev->bd_contains == bdev) {
                 if (disk->fops->release)
@@@ -1444,15 -1437,16 +1444,15 @@@
         if (!bdev->bd_openers) {
                 struct module *owner = disk->fops->owner;
   
- -              put_disk(disk);
- -              module_put(owner);
                 disk_put_part(bdev->bd_part);
                 bdev->bd_part = NULL;
                 bdev->bd_disk = NULL;
- -              bdev_inode_switch_bdi(bdev->bd_inode,
- -                                      &default_backing_dev_info);
                 if (bdev != bdev->bd_contains)
                         victim = bdev->bd_contains;
                 bdev->bd_contains = NULL;
+ +
+ +              put_disk(disk);
+ +              module_put(owner);
         }
         mutex_unlock(&bdev->bd_mutex);
         bdput(bdev);
diff --combined include/linux/genhd.h

index 6957350e122f2444173ff94e0d650e0dc01ef8c6,6d18f3531f180f401d35e8028b3395b339182c2e..9de31bc98c8803bc96bac1f0751da3ca695bf8df
--- 1/include/linux/genhd.h
--- 2/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@@ -21,8 -21,6 +21,8 @@@
   #define dev_to_part(device)   container_of((device), struct hd_struct, __dev)
   #define disk_to_dev(disk)     (&(disk)->part0.__dev)
   #define part_to_dev(part)     (&((part)->__dev))
+ +#define alias_name(disk)      ((disk)->alias ? (disk)->alias : \
+ +                                               (disk)->disk_name)
   
   extern struct device_type part_type;
   extern struct kobject *block_depr;
@@@ -60,7 -58,6 +60,7 @@@ enum 
   
   #define DISK_MAX_PARTS                        256
   #define DISK_NAME_LEN                 32
+ +#define ALIAS_LEN                     256
   
   #include <linux/major.h>
   #include <linux/device.h>
@@@ -131,6 -128,7 +131,7 @@@ struct hd_struct 
   #define GENHD_FL_EXT_DEVT                     64 /* allow extended devt */
   #define GENHD_FL_NATIVE_CAPACITY              128
   #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE   256
+ #define GENHD_FL_NO_PART_SCAN                 512
   
   enum {
         DISK_EVENT_MEDIA_CHANGE                 = 1 << 0, /* media changed */
@@@ -165,7 -163,6 +166,7 @@@ struct gendisk 
                                            * disks that can't be partitioned. */
   
         char disk_name[DISK_NAME_LEN];  /* name of major driver */
+ +      char *alias;                    /* alias name of disk */
         char *(*devnode)(struct gendisk *gd, mode_t *mode);
   
         unsigned int events;            /* supported events */
@@@ -238,9 -235,10 +239,10 @@@ static inline int disk_max_parts(struc
         return disk->minors;
   }
   
- static inline bool disk_partitionable(struct gendisk *disk)
+ static inline bool disk_part_scan_enabled(struct gendisk *disk)
   {
-       return disk_max_parts(disk) > 1;
+       return disk_max_parts(disk) > 1 &&
+               !(disk->flags & GENHD_FL_NO_PART_SCAN);
   }
   
   static inline dev_t disk_devt(struct gendisk *disk)
diff --combined include/linux/loop.h

index a06880689115ded34b73c39443004638635f2d3b,4367fc507fe916e1559c7050e24601778a18422c..11a41a8f08eb9e98cb1105982d093c3d075d5206
--- 1/include/linux/loop.h
--- 2/include/linux/loop.h
+++ b/include/linux/loop.h
@@@ -64,6 -64,7 +64,6 @@@ struct loop_device 
   
         struct request_queue    *lo_queue;
         struct gendisk          *lo_disk;
- -      struct list_head        lo_list;
   };
   
   #endif /* __KERNEL__ */
@@@ -73,7 -74,9 +73,8 @@@
    */
   enum {
         LO_FLAGS_READ_ONLY      = 1,
- -      LO_FLAGS_USE_AOPS       = 2,
         LO_FLAGS_AUTOCLEAR      = 4,
+       LO_FLAGS_PARTSCAN       = 8,
   };
   
   #include <asm/posix_types.h>  /* for __kernel_old_dev_t */
@@@ -159,8 -162,4 +160,8 @@@ int loop_unregister_transfer(int number
   #define LOOP_CHANGE_FD                0x4C06
   #define LOOP_SET_CAPACITY     0x4C07
   
+ +/* /dev/loop-control interface */
+ +#define LOOP_CTL_ADD          0x4C80
+ +#define LOOP_CTL_REMOVE               0x4C81
+ +#define LOOP_CTL_GET_FREE     0x4C82
   #endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 5 Nov 2011 00:22:14 +0000 (17:22 -0700)
		1	2
block/genhd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/loop.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/xen-blkback/blkback.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/xen-blkback/common.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/xen-blkback/xenbus.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/xen-blkfront.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/hpsa.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/genhd.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/loop.h	patch \|	diff1 \|	diff2 \|	blob \| history