ufs: don't use lock_ufs() for block pointers tree protection
authorAl Viro <viro@zeniv.linux.org.uk>
Wed, 17 Jun 2015 16:02:56 +0000 (12:02 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Mon, 6 Jul 2015 21:39:25 +0000 (17:39 -0400)
* stores to block pointers are under per-inode seqlock (meta_lock) and
mutex (truncate_mutex)
* fetches of block pointers are either under truncate_mutex, or wrapped
into seqretry loop on meta_lock
* all changes of ->i_size are under truncate_mutex and i_mutex
* all changes of ->i_lastfrag are under truncate_mutex

It's similar to what ext2 is doing; the main difference is that unlike
ext2 we can't rely upon the atomicity of stores into block pointers -
on UFS2 they are 64bit.  So we can't cut the corner when switching
a pointer from NULL to non-NULL as we could in ext2_splice_branch()
and need to use meta_lock on all modifications.

We use seqlock where ext2 uses rwlock; ext2 could probably also benefit
from such change...

Another non-trivial difference is that with UFS we *cannot* have reader
grab truncate_mutex in case of race - it has to keep retrying.  That
might be possible to change, but not until we lift tail unpacking
several levels up in call chain.

After that commit we do *NOT* hold fs-wide serialization on accesses
to block pointers anymore.  Moreover, lock_ufs() can become a normal
mutex now - it's only used on statfs, remount and sync_fs and none
of those uses are recursive.  As the matter of fact, *now* it can be
collapsed with ->s_lock, and be eventually replaced with saner
per-cylinder-group spinlocks, but that's a separate story.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/ufs/balloc.c
fs/ufs/inode.c
fs/ufs/super.c
fs/ufs/truncate.c
fs/ufs/ufs.h

index a7106eda50241bfd28ae002986335b4d0cf8f0f6..fb8b54eb77c5dcf89ef4a78d0c4dc88bc1d46abb 100644 (file)
@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
        if (oldcount == 0) {
                result = ufs_alloc_fragments (inode, cgno, goal, count, err);
                if (result) {
+                       write_seqlock(&UFS_I(inode)->meta_lock);
                        ufs_cpu_to_data_ptr(sb, p, result);
+                       write_sequnlock(&UFS_I(inode)->meta_lock);
                        *err = 0;
                        UFS_I(inode)->i_lastfrag =
                                max(UFS_I(inode)->i_lastfrag, fragment + count);
@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                ufs_change_blocknr(inode, fragment - oldcount, oldcount,
                                   uspi->s_sbbase + tmp,
                                   uspi->s_sbbase + result, locked_page);
+               write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_cpu_to_data_ptr(sb, p, result);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
                *err = 0;
                UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
                                                fragment + count);
index a4fc3adfdc4c83b0bff96cb350ae23791bfbc359..100f93c6b309bc48605d7cc295154179b2b7dedd 100644 (file)
@@ -41,8 +41,6 @@
 #include "swab.h"
 #include "util.h"
 
-static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
-
 static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
 {
        struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
@@ -75,12 +73,53 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
        return n;
 }
 
+typedef struct {
+       void    *p;
+       union {
+               __fs32  key32;
+               __fs64  key64;
+       };
+       struct buffer_head *bh;
+} Indirect;
+
+static inline int grow_chain32(struct ufs_inode_info *ufsi,
+                              struct buffer_head *bh, __fs32 *v,
+                              Indirect *from, Indirect *to)
+{
+       Indirect *p;
+       unsigned seq;
+       to->bh = bh;
+       do {
+               seq = read_seqbegin(&ufsi->meta_lock);
+               to->key32 = *(__fs32 *)(to->p = v);
+               for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++)
+                       ;
+       } while (read_seqretry(&ufsi->meta_lock, seq));
+       return (p > to);
+}
+
+static inline int grow_chain64(struct ufs_inode_info *ufsi,
+                              struct buffer_head *bh, __fs64 *v,
+                              Indirect *from, Indirect *to)
+{
+       Indirect *p;
+       unsigned seq;
+       to->bh = bh;
+       do {
+               seq = read_seqbegin(&ufsi->meta_lock);
+               to->key64 = *(__fs64 *)(to->p = v);
+               for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++)
+                       ;
+       } while (read_seqretry(&ufsi->meta_lock, seq));
+       return (p > to);
+}
+
 /*
  * Returns the location of the fragment from
  * the beginning of the filesystem.
  */
 
-static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
+static u64 ufs_frag_map(struct inode *inode, sector_t frag)
 {
        struct ufs_inode_info *ufsi = UFS_I(inode);
        struct super_block *sb = inode->i_sb;
@@ -88,12 +127,10 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
        u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
        int shift = uspi->s_apbshift-uspi->s_fpbshift;
        sector_t offsets[4], *p;
+       Indirect chain[4], *q = chain;
        int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets);
-       u64  ret = 0L;
-       __fs32 block;
-       __fs64 u2_block = 0L;
        unsigned flags = UFS_SB(sb)->s_flags;
-       u64 temp = 0L;
+       u64 res = 0;
 
        UFSD(": frag = %llu  depth = %d\n", (unsigned long long)frag, depth);
        UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
@@ -101,59 +138,73 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
                (unsigned long long)mask);
 
        if (depth == 0)
-               return 0;
+               goto no_block;
 
+again:
        p = offsets;
 
-       if (needs_lock)
-               lock_ufs(sb);
        if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
                goto ufs2;
 
-       block = ufsi->i_u1.i_data[*p++];
-       if (!block)
-               goto out;
+       if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q))
+               goto changed;
+       if (!q->key32)
+               goto no_block;
        while (--depth) {
+               __fs32 *ptr;
                struct buffer_head *bh;
                sector_t n = *p++;
 
-               bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift));
+               bh = sb_bread(sb, uspi->s_sbbase +
+                                 fs32_to_cpu(sb, q->key32) + (n>>shift));
                if (!bh)
-                       goto out;
-               block = ((__fs32 *) bh->b_data)[n & mask];
-               brelse (bh);
-               if (!block)
-                       goto out;
+                       goto no_block;
+               ptr = (__fs32 *)bh->b_data + (n & mask);
+               if (!grow_chain32(ufsi, bh, ptr, chain, ++q))
+                       goto changed;
+               if (!q->key32)
+                       goto no_block;
        }
-       ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask));
-       goto out;
-ufs2:
-       u2_block = ufsi->i_u1.u2_i_data[*p++];
-       if (!u2_block)
-               goto out;
+       res = fs32_to_cpu(sb, q->key32);
+       goto found;
 
+ufs2:
+       if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q))
+               goto changed;
+       if (!q->key64)
+               goto no_block;
 
        while (--depth) {
+               __fs64 *ptr;
                struct buffer_head *bh;
                sector_t n = *p++;
 
-
-               temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block);
-               bh = sb_bread(sb, temp +(u64) (n>>shift));
+               bh = sb_bread(sb, uspi->s_sbbase +
+                                 fs64_to_cpu(sb, q->key64) + (n>>shift));
                if (!bh)
-                       goto out;
-               u2_block = ((__fs64 *)bh->b_data)[n & mask];
-               brelse(bh);
-               if (!u2_block)
-                       goto out;
+                       goto no_block;
+               ptr = (__fs64 *)bh->b_data + (n & mask);
+               if (!grow_chain64(ufsi, bh, ptr, chain, ++q))
+                       goto changed;
+               if (!q->key64)
+                       goto no_block;
+       }
+       res = fs64_to_cpu(sb, q->key64);
+found:
+       res += uspi->s_sbbase + (frag & uspi->s_fpbmask);
+no_block:
+       while (q > chain) {
+               brelse(q->bh);
+               q--;
        }
-       temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block);
-       ret = temp + (u64) (frag & uspi->s_fpbmask);
+       return res;
 
-out:
-       if (needs_lock)
-               unlock_ufs(sb);
-       return ret;
+changed:
+       while (q > chain) {
+               brelse(q->bh);
+               q--;
+       }
+       goto again;
 }
 
 /**
@@ -421,10 +472,9 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
        int ret, err, new;
        unsigned long ptr,phys;
        u64 phys64 = 0;
-       bool needs_lock = (sbi->mutex_owner != current);
        
        if (!create) {
-               phys64 = ufs_frag_map(inode, fragment, needs_lock);
+               phys64 = ufs_frag_map(inode, fragment);
                UFSD("phys64 = %llu\n", (unsigned long long)phys64);
                if (phys64)
                        map_bh(bh_result, sb, phys64);
@@ -438,8 +488,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
        ret = 0;
        bh = NULL;
 
-       if (needs_lock)
-               lock_ufs(sb);
+       mutex_lock(&UFS_I(inode)->truncate_mutex);
 
        UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
        if (fragment >
@@ -501,8 +550,7 @@ out:
                set_buffer_new(bh_result);
        map_bh(bh_result, sb, phys);
 abort:
-       if (needs_lock)
-               unlock_ufs(sb);
+       mutex_unlock(&UFS_I(inode)->truncate_mutex);
 
        return err;
 
index 250579a80d90bd379caee1b7aeaf252dda97c34d..15cd3338340c53cd8892146fd19af239853215f2 100644 (file)
@@ -1429,6 +1429,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
                return NULL;
 
        ei->vfs_inode.i_version = 1;
+       seqlock_init(&ei->meta_lock);
+       mutex_init(&ei->truncate_mutex);
        return &ei->vfs_inode;
 }
 
index 9908a6045d7ae96149155243d01b26374be79396..ad34b7f4b4997d69a00ceb72608a7fb406c11bd3 100644 (file)
@@ -128,7 +128,9 @@ next1:
                tmp = ufs_data_ptr_to_cpu(sb, p);
                if (!tmp)
                        continue;
+               write_seqlock(&ufsi->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&ufsi->meta_lock);
 
                if (free_count == 0) {
                        frag_to_free = tmp;
@@ -157,7 +159,9 @@ next1:
        if (!tmp )
                ufs_panic(sb, "ufs_truncate_direct", "internal error");
        frag4 = ufs_fragnum (frag4);
+       write_seqlock(&ufsi->meta_lock);
        ufs_data_ptr_clear(uspi, p);
+       write_sequnlock(&ufsi->meta_lock);
 
        ufs_free_fragments (inode, tmp, frag4);
        mark_inode_dirty(inode);
@@ -199,7 +203,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
                return 1;
        }
        if (!ind_ubh) {
+               write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
                return 0;
        }
 
@@ -210,7 +216,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
                if (!tmp)
                        continue;
 
+               write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_data_ptr_clear(uspi, ind);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
                ubh_mark_buffer_dirty(ind_ubh);
                if (free_count == 0) {
                        frag_to_free = tmp;
@@ -235,7 +243,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
                        break;
        if (i >= uspi->s_apb) {
                tmp = ufs_data_ptr_to_cpu(sb, p);
+               write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
 
                ubh_bforget(ind_ubh);
                ufs_free_blocks (inode, tmp, uspi->s_fpb);
@@ -278,7 +288,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
                return 1;
        }
        if (!dind_bh) {
+               write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
                return 0;
        }
 
@@ -297,7 +309,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
                        break;
        if (i >= uspi->s_apb) {
                tmp = ufs_data_ptr_to_cpu(sb, p);
+               write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
 
                ubh_bforget(dind_bh);
                ufs_free_blocks(inode, tmp, uspi->s_fpb);
@@ -339,7 +353,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
                return 1;
        }
        if (!tind_bh) {
+               write_seqlock(&ufsi->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&ufsi->meta_lock);
                return 0;
        }
 
@@ -355,7 +371,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
                        break;
        if (i >= uspi->s_apb) {
                tmp = ufs_data_ptr_to_cpu(sb, p);
+               write_seqlock(&ufsi->meta_lock);
                ufs_data_ptr_clear(uspi, p);
+               write_sequnlock(&ufsi->meta_lock);
 
                ubh_bforget(tind_bh);
                ufs_free_blocks(inode, tmp, uspi->s_fpb);
@@ -447,7 +465,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
        struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
        int retry;
 
-       lock_ufs(sb);
+       mutex_lock(&ufsi->truncate_mutex);
        while (1) {
                retry = ufs_trunc_direct(inode);
                retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@@ -465,7 +483,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
        }
 
        ufsi->i_lastfrag = DIRECT_FRAGMENT;
-       unlock_ufs(sb);
+       mutex_unlock(&ufsi->truncate_mutex);
 }
 
 int ufs_truncate(struct inode *inode, loff_t size)
index 43fcab381de1bd2a203f52c6927ac6bee17cdcb1..ea28b73a8b7458e2a0d5f3e225c095fe5f67f83f 100644 (file)
@@ -46,6 +46,8 @@ struct ufs_inode_info {
        __u32   i_oeftflag;
        __u16   i_osync;
        __u64   i_lastfrag;
+       seqlock_t meta_lock;
+       struct mutex    truncate_mutex;
        __u32   i_dir_start_lookup;
        struct inode vfs_inode;
 };