Merge remote-tracking branch 'remotes/tegra/android-tegra-2.6.36-honeycomb-mr1' into...

[firefly-linux-kernel-4.4.55.git] / fs / block_dev.c
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 699ac9358a9e313f69c8842ae71645071f1bbfc9..0297699eb4367596c0e7c5b77c3bb403c3868e0c 100755 (executable)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -174,8 +174,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
  
-       return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
-                               iov, offset, nr_segs, blkdev_get_blocks, NULL);
+       return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
+                                   nr_segs, blkdev_get_blocks, NULL, NULL, 0);
  }
  
  int __sync_blockdev(struct block_device *bdev, int wait)
@@ -247,36 +247,14 @@ struct super_block *freeze_bdev(struct block_device *bdev)
         sb = get_active_super(bdev);
         if (!sb)
                 goto out;
-       if (sb->s_flags & MS_RDONLY) {
-               deactivate_locked_super(sb);
+       error = freeze_super(sb);
+       if (error) {
+               deactivate_super(sb);
+               bdev->bd_fsfreeze_count--;
                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return sb;
-       }
-
-       sb->s_frozen = SB_FREEZE_WRITE;
-       smp_wmb();
-
-       sync_filesystem(sb);
-
-       sb->s_frozen = SB_FREEZE_TRANS;
-       smp_wmb();
-
-       sync_blockdev(sb->s_bdev);
-
-       if (sb->s_op->freeze_fs) {
-               error = sb->s_op->freeze_fs(sb);
-               if (error) {
-                       printk(KERN_ERR
-                               "VFS:Filesystem freeze failed\n");
-                       sb->s_frozen = SB_UNFROZEN;
-                       deactivate_locked_super(sb);
-                       bdev->bd_fsfreeze_count--;
-                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                       return ERR_PTR(error);
-               }
+               return ERR_PTR(error);
         }
-       up_write(&sb->s_umount);
-
+       deactivate_super(sb);
   out:
         sync_blockdev(bdev);
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
@@ -297,40 +275,22 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
  
         mutex_lock(&bdev->bd_fsfreeze_mutex);
         if (!bdev->bd_fsfreeze_count)
-               goto out_unlock;
+               goto out;
  
         error = 0;
         if (--bdev->bd_fsfreeze_count > 0)
-               goto out_unlock;
+               goto out;
  
         if (!sb)
-               goto out_unlock;
-
-       BUG_ON(sb->s_bdev != bdev);
-       down_write(&sb->s_umount);
-       if (sb->s_flags & MS_RDONLY)
-               goto out_deactivate;
-
-       if (sb->s_op->unfreeze_fs) {
-               error = sb->s_op->unfreeze_fs(sb);
-               if (error) {
-                       printk(KERN_ERR
-                               "VFS:Filesystem thaw failed\n");
-                       sb->s_frozen = SB_FREEZE_TRANS;
-                       bdev->bd_fsfreeze_count++;
-                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                       return error;
-               }
-       }
-
-       sb->s_frozen = SB_UNFROZEN;
-       smp_wmb();
-       wake_up(&sb->s_wait_unfrozen);
+               goto out;
  
-out_deactivate:
-       if (sb)
-               deactivate_locked_super(sb);
-out_unlock:
+       error = thaw_super(sb);
+       if (error) {
+               bdev->bd_fsfreeze_count++;
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               return error;
+       }
+out:
         mutex_unlock(&bdev->bd_fsfreeze_mutex);
         return 0;
  }
@@ -350,9 +310,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned flags,
                         struct page **pagep, void **fsdata)
  {
-       *pagep = NULL;
-       return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-                               blkdev_get_block);
+       return block_write_begin(mapping, pos, len, flags, pagep,
+                                blkdev_get_block);
  }
  
  static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -400,15 +359,28 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
         return retval;
  }
         
-/*
- *     Filp is never NULL; the only case when ->fsync() is called with
- *     NULL first argument is nfsd_sync_dir() and that's not a directory.
- */
- 
-static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int blkdev_fsync(struct file *filp, int datasync)
  {
-       return sync_blockdev(I_BDEV(filp->f_mapping->host));
+       struct inode *bd_inode = filp->f_mapping->host;
+       struct block_device *bdev = I_BDEV(bd_inode);
+       int error;
+
+       /*
+        * There is no need to serialise calls to blkdev_issue_flush with
+        * i_mutex and doing so causes performance issues with concurrent
+        * O_SYNC writers to a block device.
+        */
+       mutex_unlock(&bd_inode->i_mutex);
+
+       error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
+       if (error == -EOPNOTSUPP)
+               error = 0;
+
+       mutex_lock(&bd_inode->i_mutex);
+
+       return error;
  }
+EXPORT_SYMBOL(blkdev_fsync);
  
  /*
   * pseudo-fs
@@ -456,10 +428,13 @@ static inline void __bd_forget(struct inode *inode)
         inode->i_mapping = &inode->i_data;
  }
  
-static void bdev_clear_inode(struct inode *inode)
+static void bdev_evict_inode(struct inode *inode)
  {
         struct block_device *bdev = &BDEV_I(inode)->bdev;
         struct list_head *p;
+       truncate_inode_pages(&inode->i_data, 0);
+       invalidate_inode_buffers(inode); /* is it needed here? */
+       end_writeback(inode);
         spin_lock(&bdev_lock);
         while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
                 __bd_forget(list_entry(p, struct inode, i_devices));
@@ -473,7 +448,7 @@ static const struct super_operations bdev_sops = {
         .alloc_inode = bdev_alloc_inode,
         .destroy_inode = bdev_destroy_inode,
         .drop_inode = generic_delete_inode,
-       .clear_inode = bdev_clear_inode,
+       .evict_inode = bdev_evict_inode,
  };
  
  static int bd_get_sb(struct file_system_type *fs_type,
@@ -651,41 +626,233 @@ void bd_forget(struct inode *inode)
                 iput(bdev->bd_inode);
  }
  
-int bd_claim(struct block_device *bdev, void *holder)
+/**
+ * bd_may_claim - test whether a block device can be claimed
+ * @bdev: block device of interest
+ * @whole: whole block device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Test whther @bdev can be claimed by @holder.
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock).
+ *
+ * RETURNS:
+ * %true if @bdev can be claimed, %false otherwise.
+ */
+static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
+                        void *holder)
  {
-       int res;
-       spin_lock(&bdev_lock);
-
-       /* first decide result */
         if (bdev->bd_holder == holder)
-               res = 0;         /* already a holder */
+               return true;     /* already a holder */
         else if (bdev->bd_holder != NULL)
-               res = -EBUSY;    /* held by someone else */
+               return false;    /* held by someone else */
         else if (bdev->bd_contains == bdev)
-               res = 0;         /* is a whole device which isn't held */
+               return true;     /* is a whole device which isn't held */
  
-       else if (bdev->bd_contains->bd_holder == bd_claim)
-               res = 0;         /* is a partition of a device that is being partitioned */
-       else if (bdev->bd_contains->bd_holder != NULL)
-               res = -EBUSY;    /* is a partition of a held device */
+       else if (whole->bd_holder == bd_claim)
+               return true;     /* is a partition of a device that is being partitioned */
+       else if (whole->bd_holder != NULL)
+               return false;    /* is a partition of a held device */
         else
-               res = 0;         /* is a partition of an un-held device */
+               return true;     /* is a partition of an un-held device */
+}
  
-       /* now impose change */
-       if (res==0) {
-               /* note that for a whole device bd_holders
-                * will be incremented twice, and bd_holder will
-                * be set to bd_claim before being set to holder
-                */
-               bdev->bd_contains->bd_holders ++;
-               bdev->bd_contains->bd_holder = bd_claim;
-               bdev->bd_holders++;
-               bdev->bd_holder = holder;
+/**
+ * bd_prepare_to_claim - prepare to claim a block device
+ * @bdev: block device of interest
+ * @whole: the whole device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Prepare to claim @bdev.  This function fails if @bdev is already
+ * claimed by another holder and waits if another claiming is in
+ * progress.  This function doesn't actually claim.  On successful
+ * return, the caller has ownership of bd_claiming and bd_holder[s].
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock).  Might release bdev_lock, sleep and regrab
+ * it multiple times.
+ *
+ * RETURNS:
+ * 0 if @bdev can be claimed, -EBUSY otherwise.
+ */
+static int bd_prepare_to_claim(struct block_device *bdev,
+                              struct block_device *whole, void *holder)
+{
+retry:
+       /* if someone else claimed, fail */
+       if (!bd_may_claim(bdev, whole, holder))
+               return -EBUSY;
+
+       /* if claiming is already in progress, wait for it to finish */
+       if (whole->bd_claiming) {
+               wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
+               DEFINE_WAIT(wait);
+
+               prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&bdev_lock);
+               schedule();
+               finish_wait(wq, &wait);
+               spin_lock(&bdev_lock);
+               goto retry;
         }
+
+       /* yay, all mine */
+       return 0;
+}
+
+/**
+ * bd_start_claiming - start claiming a block device
+ * @bdev: block device of interest
+ * @holder: holder trying to claim @bdev
+ *
+ * @bdev is about to be opened exclusively.  Check @bdev can be opened
+ * exclusively and mark that an exclusive open is in progress.  Each
+ * successful call to this function must be matched with a call to
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to the block device containing @bdev on success, ERR_PTR()
+ * value on failure.
+ */
+static struct block_device *bd_start_claiming(struct block_device *bdev,
+                                             void *holder)
+{
+       struct gendisk *disk;
+       struct block_device *whole;
+       int partno, err;
+
+       might_sleep();
+
+       /*
+        * @bdev might not have been initialized properly yet, look up
+        * and grab the outer block device the hard way.
+        */
+       disk = get_gendisk(bdev->bd_dev, &partno);
+       if (!disk)
+               return ERR_PTR(-ENXIO);
+
+       whole = bdget_disk(disk, 0);
+       module_put(disk->fops->owner);
+       put_disk(disk);
+       if (!whole)
+               return ERR_PTR(-ENOMEM);
+
+       /* prepare to claim, if successful, mark claiming in progress */
+       spin_lock(&bdev_lock);
+
+       err = bd_prepare_to_claim(bdev, whole, holder);
+       if (err == 0) {
+               whole->bd_claiming = holder;
+               spin_unlock(&bdev_lock);
+               return whole;
+       } else {
+               spin_unlock(&bdev_lock);
+               bdput(whole);
+               return ERR_PTR(err);
+       }
+}
+
+/* releases bdev_lock */
+static void __bd_abort_claiming(struct block_device *whole, void *holder)
+{
+       BUG_ON(whole->bd_claiming != holder);
+       whole->bd_claiming = NULL;
+       wake_up_bit(&whole->bd_claiming, 0);
+
         spin_unlock(&bdev_lock);
-       return res;
+       bdput(whole);
+}
+
+/**
+ * bd_abort_claiming - abort claiming a block device
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Abort a claiming block started by bd_start_claiming().  Note that
+ * @whole is not the block device to be claimed but the whole device
+ * returned by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_abort_claiming(struct block_device *whole, void *holder)
+{
+       spin_lock(&bdev_lock);
+       __bd_abort_claiming(whole, holder);             /* releases bdev_lock */
+}
+
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+                                       void *holder)
+{
+       /* note that for a whole device bd_holders
+        * will be incremented twice, and bd_holder will
+        * be set to bd_claim before being set to holder
+        */
+       whole->bd_holders++;
+       whole->bd_holder = bd_claim;
+       bdev->bd_holders++;
+       bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+                               struct block_device *whole, void *holder)
+{
+       spin_lock(&bdev_lock);
+       BUG_ON(!bd_may_claim(bdev, whole, holder));
+       __bd_claim(bdev, whole, holder);
+       __bd_abort_claiming(whole, holder); /* not actually an abort */
  }
  
+/**
+ * bd_claim - claim a block device
+ * @bdev: block device to claim
+ * @holder: holder trying to claim @bdev
+ *
+ * Try to claim @bdev which must have been opened successfully.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 if successful, -EBUSY if @bdev is already claimed.
+ */
+int bd_claim(struct block_device *bdev, void *holder)
+{
+       struct block_device *whole = bdev->bd_contains;
+       int res;
+
+       might_sleep();
+
+       spin_lock(&bdev_lock);
+       res = bd_prepare_to_claim(bdev, whole, holder);
+       if (res == 0)
+               __bd_claim(bdev, whole, holder);
+       spin_unlock(&bdev_lock);
+
+       return res;
+}
  EXPORT_SYMBOL(bd_claim);
  
  void bd_release(struct block_device *bdev)
@@ -1175,19 +1342,20 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
         /*
          * hooks: /n/, see "layering violations".
          */
-       ret = devcgroup_inode_permission(bdev->bd_inode, perm);
-       if (ret != 0) {
-               bdput(bdev);
-               return ret;
+       if (!for_part) {
+               ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+               if (ret != 0) {
+                       bdput(bdev);
+                       return ret;
+               }
         }
  
-       lock_kernel();
   restart:
  
         ret = -ENXIO;
         disk = get_gendisk(bdev->bd_dev, &partno);
         if (!disk)
-               goto out_unlock_kernel;
+               goto out;
  
         mutex_lock_nested(&bdev->bd_mutex, for_part);
         if (!bdev->bd_openers) {
@@ -1267,7 +1435,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
         if (for_part)
                 bdev->bd_part_count++;
         mutex_unlock(&bdev->bd_mutex);
-       unlock_kernel();
         return 0;
  
   out_clear:
@@ -1280,9 +1447,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
         bdev->bd_contains = NULL;
   out_unlock_bdev:
         mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
-       unlock_kernel();
-
+ out:
         if (disk)
                 module_put(disk->fops->owner);
         put_disk(disk);
@@ -1299,6 +1464,7 @@ EXPORT_SYMBOL(blkdev_get);
  
  static int blkdev_open(struct inode * inode, struct file * filp)
  {
+       struct block_device *whole = NULL;
         struct block_device *bdev;
         int res;
  
@@ -1321,22 +1487,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
         if (bdev == NULL)
                 return -ENOMEM;
  
+       if (filp->f_mode & FMODE_EXCL) {
+               whole = bd_start_claiming(bdev, filp);
+               if (IS_ERR(whole)) {
+                       bdput(bdev);
+                       return PTR_ERR(whole);
+               }
+       }
+
         filp->f_mapping = bdev->bd_inode->i_mapping;
  
         res = blkdev_get(bdev, filp->f_mode);
-       if (res)
-               return res;
  
-       if (filp->f_mode & FMODE_EXCL) {
-               res = bd_claim(bdev, filp);
-               if (res)
-                       goto out_blkdev_put;
+       if (whole) {
+               if (res == 0)
+                       bd_finish_claiming(bdev, whole, filp);
+               else
+                       bd_abort_claiming(whole, filp);
         }
  
-       return 0;
-
- out_blkdev_put:
-       blkdev_put(bdev, filp->f_mode);
         return res;
  }
  
@@ -1347,7 +1516,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
         struct block_device *victim = NULL;
  
         mutex_lock_nested(&bdev->bd_mutex, for_part);
-       lock_kernel();
         if (for_part)
                 bdev->bd_part_count--;
  
@@ -1372,7 +1540,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                         victim = bdev->bd_contains;
                 bdev->bd_contains = NULL;
         }
-       unlock_kernel();
         mutex_unlock(&bdev->bd_mutex);
         bdput(bdev);
         if (victim)
@@ -1545,7 +1712,7 @@ const struct file_operations def_blk_fops = {
         .aio_read       = generic_file_aio_read,
         .aio_write      = blkdev_aio_write,
         .mmap           = generic_file_mmap,
-       .fsync          = block_fsync,
+       .fsync          = blkdev_fsync,
         .unlocked_ioctl = block_ioctl,
  #ifdef CONFIG_COMPAT
         .compat_ioctl   = compat_blkdev_ioctl,
@@ -1620,27 +1787,34 @@ EXPORT_SYMBOL(lookup_bdev);
   */
  struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
  {
-       struct block_device *bdev;
-       int error = 0;
+       struct block_device *bdev, *whole;
+       int error;
  
         bdev = lookup_bdev(path);
         if (IS_ERR(bdev))
                 return bdev;
  
+       whole = bd_start_claiming(bdev, holder);
+       if (IS_ERR(whole)) {
+               bdput(bdev);
+               return whole;
+       }
+
         error = blkdev_get(bdev, mode);
         if (error)
-               return ERR_PTR(error);
+               goto out_abort_claiming;
+
         error = -EACCES;
         if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
-               goto blkdev_put;
-       error = bd_claim(bdev, holder);
-       if (error)
-               goto blkdev_put;
+               goto out_blkdev_put;
  
+       bd_finish_claiming(bdev, whole, holder);
         return bdev;
-       
-blkdev_put:
+
+out_blkdev_put:
         blkdev_put(bdev, mode);
+out_abort_claiming:
+       bd_abort_claiming(whole, holder);
         return ERR_PTR(error);
  }