X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=fs%2Flocks.c;h=8eddae23e10bed47430704cfd897b8e3a491f88b;hb=d776fdb776681cd013be513ba70ffa51d934f19d;hp=cb424a4fed71a42d237b72d16de8318bbbeb0fbd;hpb=91e14b294f3fe4d8b28516b21ceef187ea9b1bdf;p=firefly-linux-kernel-4.4.55.git diff --git a/fs/locks.c b/fs/locks.c index cb424a4fed71..8eddae23e10b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -126,12 +126,19 @@ #include #include #include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include #include #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) -#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) +#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) +#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) static bool lease_breaking(struct file_lock *fl) { @@ -150,33 +157,97 @@ static int target_leasetype(struct file_lock *fl) int leases_enable = 1; int lease_break_time = 45; -#define for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) +/* + * The global file_lock_list is only used for displaying /proc/locks, so we + * keep a list on each CPU, with each list protected by its own spinlock via + * the file_lock_lglock. Note that alterations to the list also require that + * the relevant flc_lock is held. + */ +DEFINE_STATIC_LGLOCK(file_lock_lglock); +static DEFINE_PER_CPU(struct hlist_head, file_lock_list); -static LIST_HEAD(file_lock_list); -static LIST_HEAD(blocked_list); -static DEFINE_SPINLOCK(file_lock_lock); +/* + * The blocked_hash is used to find POSIX lock loops for deadlock detection. + * It is protected by blocked_lock_lock. + * + * We hash locks by lockowner in order to optimize searching for the lock a + * particular lockowner is waiting on. + * + * FIXME: make this value scale via some heuristic? We generally will want more + * buckets when we have more lockowners holding locks, but that's a little + * difficult to determine without knowing what the workload will look like. + */ +#define BLOCKED_HASH_BITS 7 +static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); /* - * Protects the two list heads above, plus the inode->i_flock list + * This lock protects the blocked_hash. Generally, if you're accessing it, you + * want to be holding this lock. + * + * In addition, it also protects the fl->fl_block list, and the fl->fl_next + * pointer for file_lock structures that are acting as lock requests (in + * contrast to those that are acting as records of acquired locks). + * + * Note that when we acquire this lock in order to change the above fields, + * we often hold the flc_lock as well. In certain cases, when reading the fields + * protected by this lock, we can skip acquiring it iff we already hold the + * flc_lock. + * + * In particular, adding an entry to the fl_block list requires that you hold + * both the flc_lock and the blocked_lock_lock (acquired in that order). + * Deleting an entry from the list however only requires the file_lock_lock. */ -void lock_flocks(void) +static DEFINE_SPINLOCK(blocked_lock_lock); + +static struct kmem_cache *flctx_cache __read_mostly; +static struct kmem_cache *filelock_cache __read_mostly; + +static struct file_lock_context * +locks_get_lock_context(struct inode *inode, int type) { - spin_lock(&file_lock_lock); + struct file_lock_context *ctx; + + /* paired with cmpxchg() below */ + ctx = smp_load_acquire(&inode->i_flctx); + if (likely(ctx) || type == F_UNLCK) + goto out; + + ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL); + if (!ctx) + goto out; + + spin_lock_init(&ctx->flc_lock); + INIT_LIST_HEAD(&ctx->flc_flock); + INIT_LIST_HEAD(&ctx->flc_posix); + INIT_LIST_HEAD(&ctx->flc_lease); + + /* + * Assign the pointer if it's not already assigned. If it is, then + * free the context we just allocated. + */ + if (cmpxchg(&inode->i_flctx, NULL, ctx)) { + kmem_cache_free(flctx_cache, ctx); + ctx = smp_load_acquire(&inode->i_flctx); + } +out: + return ctx; } -EXPORT_SYMBOL_GPL(lock_flocks); -void unlock_flocks(void) +void +locks_free_lock_context(struct file_lock_context *ctx) { - spin_unlock(&file_lock_lock); + if (ctx) { + WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); + WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); + WARN_ON_ONCE(!list_empty(&ctx->flc_lease)); + kmem_cache_free(flctx_cache, ctx); + } } -EXPORT_SYMBOL_GPL(unlock_flocks); - -static struct kmem_cache *filelock_cache __read_mostly; static void locks_init_lock_heads(struct file_lock *fl) { - INIT_LIST_HEAD(&fl->fl_link); + INIT_HLIST_NODE(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_list); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); } @@ -200,8 +271,14 @@ void locks_release_private(struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } - fl->fl_lmops = NULL; + if (fl->fl_lmops) { + if (fl->fl_lmops->lm_put_owner) { + fl->fl_lmops->lm_put_owner(fl->fl_owner); + fl->fl_owner = NULL; + } + fl->fl_lmops = NULL; + } } EXPORT_SYMBOL_GPL(locks_release_private); @@ -209,14 +286,27 @@ EXPORT_SYMBOL_GPL(locks_release_private); void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); + BUG_ON(!list_empty(&fl->fl_list)); BUG_ON(!list_empty(&fl->fl_block)); - BUG_ON(!list_empty(&fl->fl_link)); + BUG_ON(!hlist_unhashed(&fl->fl_link)); locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } EXPORT_SYMBOL(locks_free_lock); +static void +locks_dispose_list(struct list_head *dispose) +{ + struct file_lock *fl; + + while (!list_empty(dispose)) { + fl = list_first_entry(dispose, struct file_lock, fl_list); + list_del_init(&fl->fl_list); + locks_free_lock(fl); + } +} + void locks_init_lock(struct file_lock *fl) { memset(fl, 0, sizeof(struct file_lock)); @@ -225,21 +315,10 @@ void locks_init_lock(struct file_lock *fl) EXPORT_SYMBOL(locks_init_lock); -static void locks_copy_private(struct file_lock *new, struct file_lock *fl) -{ - if (fl->fl_ops) { - if (fl->fl_ops->fl_copy_lock) - fl->fl_ops->fl_copy_lock(new, fl); - new->fl_ops = fl->fl_ops; - } - if (fl->fl_lmops) - new->fl_lmops = fl->fl_lmops; -} - /* * Initialize a new lock from an existing file_lock structure. */ -void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) +void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { new->fl_owner = fl->fl_owner; new->fl_pid = fl->fl_pid; @@ -248,21 +327,30 @@ void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) new->fl_type = fl->fl_type; new->fl_start = fl->fl_start; new->fl_end = fl->fl_end; + new->fl_lmops = fl->fl_lmops; new->fl_ops = NULL; - new->fl_lmops = NULL; + + if (fl->fl_lmops) { + if (fl->fl_lmops->lm_get_owner) + fl->fl_lmops->lm_get_owner(fl->fl_owner); + } } -EXPORT_SYMBOL(__locks_copy_lock); +EXPORT_SYMBOL(locks_copy_conflock); void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { - locks_release_private(new); + /* "new" must be a freshly-initialized lock */ + WARN_ON_ONCE(new->fl_ops); + + locks_copy_conflock(new, fl); - __locks_copy_lock(new, fl); new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; - new->fl_lmops = fl->fl_lmops; - locks_copy_private(new, fl); + if (fl->fl_ops) { + if (fl->fl_ops->fl_copy_lock) + fl->fl_ops->fl_copy_lock(new, fl); + } } EXPORT_SYMBOL(locks_copy_lock); @@ -282,26 +370,27 @@ static inline int flock_translate_cmd(int cmd) { } /* Fill in a file_lock structure with an appropriate FLOCK lock. */ -static int flock_make_lock(struct file *filp, struct file_lock **lock, - unsigned int cmd) +static struct file_lock * +flock_make_lock(struct file *filp, unsigned int cmd) { struct file_lock *fl; int type = flock_translate_cmd(cmd); + if (type < 0) - return type; + return ERR_PTR(type); fl = locks_alloc_lock(); if (fl == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); fl->fl_file = filp; + fl->fl_owner = filp; fl->fl_pid = current->tgid; fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; - *lock = fl; - return 0; + return fl; } static int assign_type(struct file_lock *fl, long type) @@ -318,48 +407,43 @@ static int assign_type(struct file_lock *fl, long type) return 0; } -/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX - * style lock. - */ -static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, - struct flock *l) +static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, + struct flock64 *l) { - off_t start, end; - switch (l->l_whence) { case SEEK_SET: - start = 0; + fl->fl_start = 0; break; case SEEK_CUR: - start = filp->f_pos; + fl->fl_start = filp->f_pos; break; case SEEK_END: - start = i_size_read(file_inode(filp)); + fl->fl_start = i_size_read(file_inode(filp)); break; default: return -EINVAL; } + if (l->l_start > OFFSET_MAX - fl->fl_start) + return -EOVERFLOW; + fl->fl_start += l->l_start; + if (fl->fl_start < 0) + return -EINVAL; /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ - start += l->l_start; - if (start < 0) - return -EINVAL; - fl->fl_end = OFFSET_MAX; if (l->l_len > 0) { - end = start + l->l_len - 1; - fl->fl_end = end; + if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) + return -EOVERFLOW; + fl->fl_end = fl->fl_start + l->l_len - 1; + } else if (l->l_len < 0) { - end = start - 1; - fl->fl_end = end; - start += l->l_len; - if (start < 0) + if (fl->fl_start + l->l_len < 0) return -EINVAL; - } - fl->fl_start = start; /* we record the absolute position */ - if (fl->fl_end < fl->fl_start) - return -EOVERFLOW; - + fl->fl_end = fl->fl_start - 1; + fl->fl_start += l->l_len; + } else + fl->fl_end = OFFSET_MAX; + fl->fl_owner = current->files; fl->fl_pid = current->tgid; fl->fl_file = filp; @@ -370,62 +454,51 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, return assign_type(fl, l->l_type); } -#if BITS_PER_LONG == 32 -static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, - struct flock64 *l) +/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX + * style lock. + */ +static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, + struct flock *l) { - loff_t start; - - switch (l->l_whence) { - case SEEK_SET: - start = 0; - break; - case SEEK_CUR: - start = filp->f_pos; - break; - case SEEK_END: - start = i_size_read(file_inode(filp)); - break; - default: - return -EINVAL; - } - - start += l->l_start; - if (start < 0) - return -EINVAL; - fl->fl_end = OFFSET_MAX; - if (l->l_len > 0) { - fl->fl_end = start + l->l_len - 1; - } else if (l->l_len < 0) { - fl->fl_end = start - 1; - start += l->l_len; - if (start < 0) - return -EINVAL; - } - fl->fl_start = start; /* we record the absolute position */ - if (fl->fl_end < fl->fl_start) - return -EOVERFLOW; - - fl->fl_owner = current->files; - fl->fl_pid = current->tgid; - fl->fl_file = filp; - fl->fl_flags = FL_POSIX; - fl->fl_ops = NULL; - fl->fl_lmops = NULL; + struct flock64 ll = { + .l_type = l->l_type, + .l_whence = l->l_whence, + .l_start = l->l_start, + .l_len = l->l_len, + }; - return assign_type(fl, l->l_type); + return flock64_to_posix_lock(filp, fl, &ll); } -#endif /* default lease lock manager operations */ -static void lease_break_callback(struct file_lock *fl) +static bool +lease_break_callback(struct file_lock *fl) { kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); + return false; +} + +static void +lease_setup(struct file_lock *fl, void **priv) +{ + struct file *filp = fl->fl_file; + struct fasync_struct *fa = *priv; + + /* + * fasync_insert_entry() returns the old entry if any. If there was no + * old entry, then it used "priv" and inserted it into the fasync list. + * Clear the pointer to indicate that it shouldn't be freed. + */ + if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa)) + *priv = NULL; + + __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); } static const struct lock_manager_operations lease_manager_ops = { .lm_break = lease_break_callback, .lm_change = lease_modify, + .lm_setup = lease_setup, }; /* @@ -436,7 +509,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) if (assign_type(fl, type) != 0) return -EINVAL; - fl->fl_owner = current->files; + fl->fl_owner = filp; fl->fl_pid = current->tgid; fl->fl_file = filp; @@ -484,47 +557,118 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner; } +/* Must be called with the flc_lock held! */ +static void locks_insert_global_locks(struct file_lock *fl) +{ + lg_local_lock(&file_lock_lglock); + fl->fl_link_cpu = smp_processor_id(); + hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); + lg_local_unlock(&file_lock_lglock); +} + +/* Must be called with the flc_lock held! */ +static void locks_delete_global_locks(struct file_lock *fl) +{ + /* + * Avoid taking lock if already unhashed. This is safe since this check + * is done while holding the flc_lock, and new insertions into the list + * also require that it be held. + */ + if (hlist_unhashed(&fl->fl_link)) + return; + lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); + hlist_del_init(&fl->fl_link); + lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); +} + +static unsigned long +posix_owner_key(struct file_lock *fl) +{ + if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) + return fl->fl_lmops->lm_owner_key(fl); + return (unsigned long)fl->fl_owner; +} + +static void locks_insert_global_blocked(struct file_lock *waiter) +{ + lockdep_assert_held(&blocked_lock_lock); + + hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); +} + +static void locks_delete_global_blocked(struct file_lock *waiter) +{ + lockdep_assert_held(&blocked_lock_lock); + + hash_del(&waiter->fl_link); +} + /* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. + * + * Must be called with blocked_lock_lock held. */ static void __locks_delete_block(struct file_lock *waiter) { + locks_delete_global_blocked(waiter); list_del_init(&waiter->fl_block); - list_del_init(&waiter->fl_link); waiter->fl_next = NULL; } -/* - */ -void locks_delete_block(struct file_lock *waiter) +static void locks_delete_block(struct file_lock *waiter) { - lock_flocks(); + spin_lock(&blocked_lock_lock); __locks_delete_block(waiter); - unlock_flocks(); + spin_unlock(&blocked_lock_lock); } -EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. + * + * Must be called with both the flc_lock and blocked_lock_lock held. The + * fl_block list itself is protected by the blocked_lock_lock, but by ensuring + * that the flc_lock is also held on insertions we can avoid taking the + * blocked_lock_lock in some cases when we see that the fl_block list is empty. */ -static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) +static void __locks_insert_block(struct file_lock *blocker, + struct file_lock *waiter) { BUG_ON(!list_empty(&waiter->fl_block)); - list_add_tail(&waiter->fl_block, &blocker->fl_block); waiter->fl_next = blocker; - if (IS_POSIX(blocker)) - list_add(&waiter->fl_link, &blocked_list); + list_add_tail(&waiter->fl_block, &blocker->fl_block); + if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) + locks_insert_global_blocked(waiter); +} + +/* Must be called with flc_lock held. */ +static void locks_insert_block(struct file_lock *blocker, + struct file_lock *waiter) +{ + spin_lock(&blocked_lock_lock); + __locks_insert_block(blocker, waiter); + spin_unlock(&blocked_lock_lock); } -/* Wake up processes blocked waiting for blocker. - * If told to wait then schedule the processes until the block list - * is empty, otherwise empty the block list ourselves. +/* + * Wake up processes blocked waiting for blocker. + * + * Must be called with the inode->flc_lock held! */ static void locks_wake_up_blocks(struct file_lock *blocker) { + /* + * Avoid taking global lock if list is empty. This is safe since new + * blocked requests are only added to the list under the flc_lock, and + * the flc_lock is always held here. Note that removal from the fl_block + * list does not require the flc_lock, so we must recheck list_empty() + * after acquiring the blocked_lock_lock. + */ + if (list_empty(&blocker->fl_block)) + return; + + spin_lock(&blocked_lock_lock); while (!list_empty(&blocker->fl_block)) { struct file_lock *waiter; @@ -536,43 +680,37 @@ static void locks_wake_up_blocks(struct file_lock *blocker) else wake_up(&waiter->fl_wait); } + spin_unlock(&blocked_lock_lock); } -/* Insert file lock fl into an inode's lock list at the position indicated - * by pos. At the same time add the lock to the global file lock list. - */ -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +static void +locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before) { - list_add(&fl->fl_link, &file_lock_list); - fl->fl_nspid = get_pid(task_tgid(current)); - - /* insert into file's list */ - fl->fl_next = *pos; - *pos = fl; + list_add_tail(&fl->fl_list, before); + locks_insert_global_locks(fl); } -/* - * Delete a lock and then free it. - * Wake up processes that are blocked waiting for this lock, - * notify the FS that the lock has been cleared and - * finally free the lock. - */ -static void locks_delete_lock(struct file_lock **thisfl_p) +static void +locks_unlink_lock_ctx(struct file_lock *fl) { - struct file_lock *fl = *thisfl_p; - - *thisfl_p = fl->fl_next; - fl->fl_next = NULL; - list_del_init(&fl->fl_link); - + locks_delete_global_locks(fl); + list_del_init(&fl->fl_list); if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; } - locks_wake_up_blocks(fl); - locks_free_lock(fl); +} + +static void +locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose) +{ + locks_unlink_lock_ctx(fl); + if (dispose) + list_add(&fl->fl_list, dispose); + else + locks_free_lock(fl); } /* Determine if lock sys_fl blocks lock caller_fl. Common functionality @@ -595,7 +733,7 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* POSIX locks owned by the same process do not conflict with * each other. */ - if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) + if (posix_same_owner(caller_fl, sys_fl)) return (0); /* Check whether they overlap */ @@ -613,7 +751,7 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s /* FLOCK locks referring to the same filp do not conflict with * each other. */ - if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) + if (caller_fl->fl_file == sys_fl->fl_file) return (0); if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) return 0; @@ -625,21 +763,27 @@ void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; + struct file_lock_context *ctx; + struct inode *inode = file_inode(filp); - lock_flocks(); - for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { - if (!IS_POSIX(cfl)) - continue; - if (posix_locks_conflict(fl, cfl)) - break; - } - if (cfl) { - __locks_copy_lock(fl, cfl); - if (cfl->fl_nspid) - fl->fl_pid = pid_vnr(cfl->fl_nspid); - } else + ctx = smp_load_acquire(&inode->i_flctx); + if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; - unlock_flocks(); + return; + } + + spin_lock(&ctx->flc_lock); + list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { + if (posix_locks_conflict(fl, cfl)) { + locks_copy_conflock(fl, cfl); + if (cfl->fl_nspid) + fl->fl_pid = pid_vnr(cfl->fl_nspid); + goto out; + } + } + fl->fl_type = F_UNLCK; +out: + spin_unlock(&ctx->flc_lock); return; } EXPORT_SYMBOL(posix_test_lock); @@ -665,8 +809,16 @@ EXPORT_SYMBOL(posix_test_lock); * Note: the above assumption may not be true when handling lock * requests from a broken NFS client. It may also fail in the presence * of tasks (such as posix threads) sharing the same open file table. - * * To handle those cases, we just bail out after a few iterations. + * + * For FL_OFDLCK locks, the owner is the filp, not the files_struct. + * Because the owner is not even nominally tied to a thread of + * execution, the deadlock detection below can't reasonably work well. Just + * skip it for those. + * + * In principle, we could do a more limited deadlock detection on FL_OFDLCK + * locks that just checks for the case where two tasks are attempting to + * upgrade from read to write locks on the same inode. */ #define MAX_DEADLK_ITERATIONS 10 @@ -676,18 +828,28 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) { struct file_lock *fl; - list_for_each_entry(fl, &blocked_list, fl_link) { + hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { if (posix_same_owner(fl, block_fl)) return fl->fl_next; } return NULL; } +/* Must be called with the blocked_lock_lock held! */ static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { int i = 0; + lockdep_assert_held(&blocked_lock_lock); + + /* + * This deadlock detector can't reasonably detect deadlocks with + * FL_OFDLCK locks, since they aren't owned by a process, per-se. + */ + if (IS_OFDLCK(caller_fl)) + return 0; + while ((block_fl = what_owner_is_waiting_for(block_fl))) { if (i++ > MAX_DEADLK_ITERATIONS) return 0; @@ -704,13 +866,21 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ -static int flock_lock_file(struct file *filp, struct file_lock *request) +static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; - struct file_lock **before; - struct inode * inode = file_inode(filp); + struct file_lock *fl; + struct file_lock_context *ctx; int error = 0; - int found = 0; + bool found = false; + LIST_HEAD(dispose); + + ctx = locks_get_lock_context(inode, request->fl_type); + if (!ctx) { + if (request->fl_type != F_UNLCK) + return -ENOMEM; + return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0; + } if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { new_fl = locks_alloc_lock(); @@ -718,22 +888,17 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) return -ENOMEM; } - lock_flocks(); + spin_lock(&ctx->flc_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; - for_each_lock(inode, before) { - struct file_lock *fl = *before; - if (IS_POSIX(fl)) - break; - if (IS_LEASE(fl)) - continue; - if (filp != fl->fl_file) + list_for_each_entry(fl, &ctx->flc_flock, fl_list) { + if (request->fl_file != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; - found = 1; - locks_delete_lock(before); + found = true; + locks_delete_lock_ctx(fl, &dispose); break; } @@ -743,23 +908,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto out; } - /* - * If a higher-priority process was blocked on the old file lock, - * give it the opportunity to lock the file. - */ - if (found) { - unlock_flocks(); - cond_resched(); - lock_flocks(); - } - find_conflict: - for_each_lock(inode, before) { - struct file_lock *fl = *before; - if (IS_POSIX(fl)) - break; - if (IS_LEASE(fl)) - continue; + list_for_each_entry(fl, &ctx->flc_flock, fl_list) { if (!flock_locks_conflict(request, fl)) continue; error = -EAGAIN; @@ -772,26 +922,33 @@ find_conflict: if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &ctx->flc_flock); new_fl = NULL; error = 0; out: - unlock_flocks(); + spin_unlock(&ctx->flc_lock); if (new_fl) locks_free_lock(new_fl); + locks_dispose_list(&dispose); return error; } static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { - struct file_lock *fl; + struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; - struct file_lock **before; - int error, added = 0; + struct file_lock_context *ctx; + int error; + bool added = false; + LIST_HEAD(dispose); + + ctx = locks_get_lock_context(inode, request->fl_type); + if (!ctx) + return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM; /* * We may need two file_lock structures for this operation, @@ -806,24 +963,32 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str new_fl2 = locks_alloc_lock(); } - lock_flocks(); + spin_lock(&ctx->flc_lock); + /* + * New lock request. Walk all POSIX locks and look for conflicts. If + * there are any, either return error or put the request on the + * blocker's list of waiters and the global blocked_hash. + */ if (request->fl_type != F_UNLCK) { - for_each_lock(inode, before) { - fl = *before; - if (!IS_POSIX(fl)) - continue; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!posix_locks_conflict(request, fl)) continue; if (conflock) - __locks_copy_lock(conflock, fl); + locks_copy_conflock(conflock, fl); error = -EAGAIN; if (!(request->fl_flags & FL_SLEEP)) goto out; + /* + * Deadlock detection and insertion into the blocked + * locks list must be done while holding the same lock! + */ error = -EDEADLK; - if (posix_locks_deadlock(request, fl)) - goto out; - error = FILE_LOCK_DEFERRED; - locks_insert_block(fl, request); + spin_lock(&blocked_lock_lock); + if (likely(!posix_locks_deadlock(request, fl))) { + error = FILE_LOCK_DEFERRED; + __locks_insert_block(fl, request); + } + spin_unlock(&blocked_lock_lock); goto out; } } @@ -833,29 +998,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (request->fl_flags & FL_ACCESS) goto out; - /* - * Find the first old lock with the same owner as the new lock. - */ - - before = &inode->i_flock; - - /* First skip locks owned by other processes. */ - while ((fl = *before) && (!IS_POSIX(fl) || - !posix_same_owner(request, fl))) { - before = &fl->fl_next; + /* Find the first old lock with the same owner as the new lock */ + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { + if (posix_same_owner(request, fl)) + break; } - /* Process locks with this owner. */ - while ((fl = *before) && posix_same_owner(request, fl)) { - /* Detect adjacent or overlapping regions (if same lock type) - */ + /* Process locks with this owner. */ + list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) { + if (!posix_same_owner(request, fl)) + break; + + /* Detect adjacent or overlapping regions (if same lock type) */ if (request->fl_type == fl->fl_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative. */ if (fl->fl_end < request->fl_start - 1) - goto next_lock; + continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ @@ -876,22 +1037,21 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str else request->fl_end = fl->fl_end; if (added) { - locks_delete_lock(before); + locks_delete_lock_ctx(fl, &dispose); continue; } request = fl; - added = 1; - } - else { + added = true; + } else { /* Processing for different lock types is a bit * more complex. */ if (fl->fl_end < request->fl_start) - goto next_lock; + continue; if (fl->fl_start > request->fl_end) break; if (request->fl_type == F_UNLCK) - added = 1; + added = true; if (fl->fl_start < request->fl_start) left = fl; /* If the next lock in the list has a higher end @@ -906,35 +1066,33 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str * one (This may happen several times). */ if (added) { - locks_delete_lock(before); + locks_delete_lock_ctx(fl, &dispose); continue; } - /* Replace the old lock with the new one. - * Wake up anybody waiting for the old one, - * as the change in lock type might satisfy - * their needs. + /* + * Replace the old lock with new_fl, and + * remove the old one. It's safe to do the + * insert here since we know that we won't be + * using new_fl later, and that the lock is + * just replacing an existing lock. */ - locks_wake_up_blocks(fl); - fl->fl_start = request->fl_start; - fl->fl_end = request->fl_end; - fl->fl_type = request->fl_type; - locks_release_private(fl); - locks_copy_private(fl, request); - request = fl; - added = 1; + error = -ENOLCK; + if (!new_fl) + goto out; + locks_copy_lock(new_fl, request); + request = new_fl; + new_fl = NULL; + locks_insert_lock_ctx(request, &fl->fl_list); + locks_delete_lock_ctx(fl, &dispose); + added = true; } } - /* Go on to next lock. - */ - next_lock: - before = &fl->fl_next; } /* - * The above code only modifies existing locks in case of - * merging or replacing. If new lock(s) need to be inserted - * all modifications are done bellow this, so it's safe yet to - * bail out. + * The above code only modifies existing locks in case of merging or + * replacing. If new lock(s) need to be inserted all modifications are + * done below this, so it's safe yet to bail out. */ error = -ENOLCK; /* "no luck" */ if (right && left == right && !new_fl2) @@ -953,7 +1111,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str goto out; } locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &fl->fl_list); + fl = new_fl; new_fl = NULL; } if (right) { @@ -964,7 +1123,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str left = new_fl2; new_fl2 = NULL; locks_copy_lock(left, right); - locks_insert_lock(before, left); + locks_insert_lock_ctx(left, &fl->fl_list); } right->fl_start = request->fl_end + 1; locks_wake_up_blocks(right); @@ -974,7 +1133,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_wake_up_blocks(left); } out: - unlock_flocks(); + spin_unlock(&ctx->flc_lock); /* * Free any unused locks. */ @@ -982,6 +1141,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_free_lock(new_fl); if (new_fl2) locks_free_lock(new_fl2); + locks_dispose_list(&dispose); return error; } @@ -1007,20 +1167,18 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Apply a POSIX style lock request to an inode. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1032,32 +1190,39 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } -EXPORT_SYMBOL(posix_lock_file_wait); /** * locks_mandatory_locked - Check for an active lock - * @inode: the file to check + * @file: the file to check * * Searches the inode's list of locks to find any POSIX locks which conflict. * This function is called from locks_verify_locked() only. */ -int locks_mandatory_locked(struct inode *inode) +int locks_mandatory_locked(struct file *file) { - fl_owner_t owner = current->files; + int ret; + struct inode *inode = file_inode(file); + struct file_lock_context *ctx; struct file_lock *fl; + ctx = smp_load_acquire(&inode->i_flctx); + if (!ctx || list_empty_careful(&ctx->flc_posix)) + return 0; + /* * Search the lock list for this inode for any POSIX locks. */ - lock_flocks(); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!IS_POSIX(fl)) - continue; - if (fl->fl_owner != owner) + spin_lock(&ctx->flc_lock); + ret = 0; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { + if (fl->fl_owner != current->files && + fl->fl_owner != file) { + ret = -EAGAIN; break; + } } - unlock_flocks(); - return fl ? -EAGAIN : 0; + spin_unlock(&ctx->flc_lock); + return ret; } /** @@ -1079,19 +1244,30 @@ int locks_mandatory_area(int read_write, struct inode *inode, { struct file_lock fl; int error; + bool sleep = false; locks_init_lock(&fl); - fl.fl_owner = current->files; fl.fl_pid = current->tgid; fl.fl_file = filp; fl.fl_flags = FL_POSIX | FL_ACCESS; if (filp && !(filp->f_flags & O_NONBLOCK)) - fl.fl_flags |= FL_SLEEP; + sleep = true; fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; fl.fl_start = offset; fl.fl_end = offset + count - 1; for (;;) { + if (filp) { + fl.fl_owner = filp; + fl.fl_flags &= ~FL_SLEEP; + error = __posix_lock_file(inode, &fl, NULL); + if (!error) + break; + } + + if (sleep) + fl.fl_flags |= FL_SLEEP; + fl.fl_owner = current->files; error = __posix_lock_file(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; @@ -1126,9 +1302,8 @@ static void lease_clear_pending(struct file_lock *fl, int arg) } /* We already had a lease on this file; just change its type */ -int lease_modify(struct file_lock **before, int arg) +int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) { - struct file_lock *fl = *before; int error = assign_type(fl, arg); if (error) @@ -1145,11 +1320,10 @@ int lease_modify(struct file_lock **before, int arg) printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); fl->fl_fasync = NULL; } - locks_delete_lock(before); + locks_delete_lock_ctx(fl, dispose); } return 0; } - EXPORT_SYMBOL(lease_modify); static bool past_time(unsigned long then) @@ -1160,60 +1334,87 @@ static bool past_time(unsigned long then) return time_after(jiffies, then); } -static void time_out_leases(struct inode *inode) +static void time_out_leases(struct inode *inode, struct list_head *dispose) { - struct file_lock **before; - struct file_lock *fl; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl, *tmp; + + lockdep_assert_held(&ctx->flc_lock); - before = &inode->i_flock; - while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { + trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) - lease_modify(before, F_RDLCK); + lease_modify(fl, F_RDLCK, dispose); if (past_time(fl->fl_break_time)) - lease_modify(before, F_UNLCK); - if (fl == *before) /* lease_modify may have freed fl */ - before = &fl->fl_next; + lease_modify(fl, F_UNLCK, dispose); + } +} + +static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) +{ + if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) + return false; + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) + return false; + return locks_conflict(breaker, lease); +} + +static bool +any_leases_conflict(struct inode *inode, struct file_lock *breaker) +{ + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl; + + lockdep_assert_held(&ctx->flc_lock); + + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (leases_conflict(fl, breaker)) + return true; } + return false; } /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return - * @mode: the open mode (read or write) + * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: + * break all leases + * @type: FL_LEASE: break leases and delegations; FL_DELEG: break + * only delegations * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ -int __break_lease(struct inode *inode, unsigned int mode) +int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; - struct file_lock *new_fl, *flock; - struct file_lock *fl; + struct file_lock_context *ctx; + struct file_lock *new_fl, *fl, *tmp; unsigned long break_time; - int i_have_this_lease = 0; int want_write = (mode & O_ACCMODE) != O_RDONLY; + LIST_HEAD(dispose); new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); + new_fl->fl_flags = type; - lock_flocks(); + /* typically we will check that ctx is non-NULL before calling */ + ctx = smp_load_acquire(&inode->i_flctx); + if (!ctx) { + WARN_ON_ONCE(1); + return error; + } - time_out_leases(inode); + spin_lock(&ctx->flc_lock); - flock = inode->i_flock; - if ((flock == NULL) || !IS_LEASE(flock)) - goto out; + time_out_leases(inode, &dispose); - if (!locks_conflict(flock, new_fl)) + if (!any_leases_conflict(inode, new_fl)) goto out; - for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) - if (fl->fl_owner == current->files) - i_have_this_lease = 1; - break_time = 0; if (lease_break_time > 0) { break_time = jiffies + lease_break_time * HZ; @@ -1221,56 +1422,63 @@ int __break_lease(struct inode *inode, unsigned int mode) break_time++; /* so that 0 means no break time */ } - for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { + if (!leases_conflict(fl, new_fl)) + continue; if (want_write) { if (fl->fl_flags & FL_UNLOCK_PENDING) continue; fl->fl_flags |= FL_UNLOCK_PENDING; fl->fl_break_time = break_time; } else { - if (lease_breaking(flock)) + if (lease_breaking(fl)) continue; fl->fl_flags |= FL_DOWNGRADE_PENDING; fl->fl_downgrade_time = break_time; } - fl->fl_lmops->lm_break(fl); + if (fl->fl_lmops->lm_break(fl)) + locks_delete_lock_ctx(fl, &dispose); } - if (i_have_this_lease || (mode & O_NONBLOCK)) { + if (list_empty(&ctx->flc_lease)) + goto out; + + if (mode & O_NONBLOCK) { + trace_break_lease_noblock(inode, new_fl); error = -EWOULDBLOCK; goto out; } restart: - break_time = flock->fl_break_time; - if (break_time != 0) { + fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list); + break_time = fl->fl_break_time; + if (break_time != 0) break_time -= jiffies; - if (break_time == 0) - break_time++; - } - locks_insert_block(flock, new_fl); - unlock_flocks(); + if (break_time == 0) + break_time++; + locks_insert_block(fl, new_fl); + trace_break_lease_block(inode, new_fl); + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, !new_fl->fl_next, break_time); - lock_flocks(); - __locks_delete_block(new_fl); + spin_lock(&ctx->flc_lock); + trace_break_lease_unblock(inode, new_fl); + locks_delete_block(new_fl); if (error >= 0) { - if (error == 0) - time_out_leases(inode); /* * Wait for the next conflicting lease that has not been * broken yet */ - for (flock = inode->i_flock; flock && IS_LEASE(flock); - flock = flock->fl_next) { - if (locks_conflict(new_fl, flock)) - goto restart; - } + if (error == 0) + time_out_leases(inode, &dispose); + if (any_leases_conflict(inode, new_fl)) + goto restart; error = 0; } - out: - unlock_flocks(); + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; } @@ -1288,8 +1496,23 @@ EXPORT_SYMBOL(__break_lease); */ void lease_get_mtime(struct inode *inode, struct timespec *time) { - struct file_lock *flock = inode->i_flock; - if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) + bool has_lease = false; + struct file_lock_context *ctx; + struct file_lock *fl; + + ctx = smp_load_acquire(&inode->i_flctx); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + if (!list_empty(&ctx->flc_lease)) { + fl = list_first_entry(&ctx->flc_lease, + struct file_lock, fl_list); + if (fl->fl_type == F_WRLCK) + has_lease = true; + } + spin_unlock(&ctx->flc_lock); + } + + if (has_lease) *time = current_fs_time(inode->i_sb); else *time = inode->i_mtime; @@ -1323,36 +1546,98 @@ EXPORT_SYMBOL(lease_get_mtime); int fcntl_getlease(struct file *filp) { struct file_lock *fl; + struct inode *inode = file_inode(filp); + struct file_lock_context *ctx; int type = F_UNLCK; - - lock_flocks(); - time_out_leases(file_inode(filp)); - for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); - fl = fl->fl_next) { - if (fl->fl_file == filp) { + LIST_HEAD(dispose); + + ctx = smp_load_acquire(&inode->i_flctx); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + time_out_leases(file_inode(filp), &dispose); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file != filp) + continue; type = target_leasetype(fl); break; } + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); } - unlock_flocks(); return type; } -int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) +/** + * check_conflicting_open - see if the given dentry points to a file that has + * an existing open that would conflict with the + * desired lease. + * @dentry: dentry to check + * @arg: type of lease that we're trying to acquire + * @flags: current lock flags + * + * Check to see if there's an existing open fd on this file that would + * conflict with the lease we're trying to set. + */ +static int +check_conflicting_open(const struct dentry *dentry, const long arg, int flags) +{ + int ret = 0; + struct inode *inode = dentry->d_inode; + + if (flags & FL_LAYOUT) + return 0; + + if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) + return -EAGAIN; + + if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || + (atomic_read(&inode->i_count) > 1))) + ret = -EAGAIN; + + return ret; +} + +static int +generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { - struct file_lock *fl, **before, **my_before = NULL, *lease; + struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); + struct file_lock_context *ctx; + bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; + LIST_HEAD(dispose); lease = *flp; + trace_generic_add_lease(inode, lease); - error = -EAGAIN; - if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) - goto out; - if ((arg == F_WRLCK) - && ((dentry->d_count > 1) - || (atomic_read(&inode->i_count) > 1))) + /* Note that arg is never F_UNLCK here */ + ctx = locks_get_lock_context(inode, arg); + if (!ctx) + return -ENOMEM; + + /* + * In the delegation case we need mutual exclusion with + * a number of operations that take the i_mutex. We trylock + * because delegations are an optional optimization, and if + * there's some chance of a conflict--we'd rather not + * bother, maybe that's a sign this just isn't a good file to + * hand out a delegation on. + */ + if (is_deleg && !mutex_trylock(&inode->i_mutex)) + return -EAGAIN; + + if (is_deleg && arg == F_WRLCK) { + /* Write delegations are not currently supported: */ + mutex_unlock(&inode->i_mutex); + WARN_ON_ONCE(1); + return -EINVAL; + } + + spin_lock(&ctx->flc_lock); + time_out_leases(inode, &dispose); + error = check_conflicting_open(dentry, arg, lease->fl_flags); + if (error) goto out; /* @@ -1364,13 +1649,13 @@ int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) * except for this filp. */ error = -EAGAIN; - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { - if (fl->fl_file == filp) { - my_before = before; + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file == filp && + fl->fl_owner == lease->fl_owner) { + my_fl = fl; continue; } + /* * No exclusive leases if someone else has a lease on * this file: @@ -1385,55 +1670,93 @@ int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) goto out; } - if (my_before != NULL) { - error = lease->fl_lmops->lm_change(my_before, arg); - if (!error) - *flp = *my_before; - goto out; + if (my_fl != NULL) { + lease = my_fl; + error = lease->fl_lmops->lm_change(lease, arg, &dispose); + if (error) + goto out; + goto out_setup; } error = -EINVAL; if (!leases_enable) goto out; - locks_insert_lock(before, lease); - return 0; + locks_insert_lock_ctx(lease, &ctx->flc_lease); + /* + * The check in break_lease() is lockless. It's possible for another + * open to race in after we did the earlier check for a conflicting + * open but before the lease was inserted. Check again for a + * conflicting open and cancel the lease if there is one. + * + * We also add a barrier here to ensure that the insertion of the lock + * precedes these checks. + */ + smp_mb(); + error = check_conflicting_open(dentry, arg, lease->fl_flags); + if (error) { + locks_unlink_lock_ctx(lease); + goto out; + } +out_setup: + if (lease->fl_lmops->lm_setup) + lease->fl_lmops->lm_setup(lease, priv); out: + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); + if (is_deleg) + mutex_unlock(&inode->i_mutex); + if (!error && !my_fl) + *flp = NULL; return error; } -int generic_delete_lease(struct file *filp, struct file_lock **flp) +static int generic_delete_lease(struct file *filp, void *owner) { - struct file_lock *fl, **before; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + int error = -EAGAIN; + struct file_lock *fl, *victim = NULL; + struct inode *inode = file_inode(filp); + struct file_lock_context *ctx; + LIST_HEAD(dispose); - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { - if (fl->fl_file != filp) - continue; - return (*flp)->fl_lmops->lm_change(before, F_UNLCK); + ctx = smp_load_acquire(&inode->i_flctx); + if (!ctx) { + trace_generic_delete_lease(inode, NULL); + return error; + } + + spin_lock(&ctx->flc_lock); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file == filp && + fl->fl_owner == owner) { + victim = fl; + break; + } } - return -EAGAIN; + trace_generic_delete_lease(inode, victim); + if (victim) + error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); + return error; } /** * generic_setlease - sets a lease on an open file - * @filp: file pointer - * @arg: type of lease to obtain - * @flp: input - file_lock to use, output - file_lock inserted + * @filp: file pointer + * @arg: type of lease to obtain + * @flp: input - file_lock to use, output - file_lock inserted + * @priv: private data for lm_setup (may be NULL if lm_setup + * doesn't require it) * * The (input) flp->fl_lmops->lm_break function is required * by break_lease(). - * - * Called with file_lock_lock held. */ -int generic_setlease(struct file *filp, long arg, struct file_lock **flp) +int generic_setlease(struct file *filp, long arg, struct file_lock **flp, + void **priv) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); int error; if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) @@ -1444,81 +1767,53 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if (error) return error; - time_out_leases(inode); - - BUG_ON(!(*flp)->fl_lmops->lm_break); - switch (arg) { case F_UNLCK: - return generic_delete_lease(filp, flp); + return generic_delete_lease(filp, *priv); case F_RDLCK: case F_WRLCK: - return generic_add_lease(filp, arg, flp); + if (!(*flp)->fl_lmops->lm_break) { + WARN_ON_ONCE(1); + return -ENOLCK; + } + + return generic_add_lease(filp, arg, flp, priv); default: return -EINVAL; } } EXPORT_SYMBOL(generic_setlease); -static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) -{ - if (filp->f_op && filp->f_op->setlease) - return filp->f_op->setlease(filp, arg, lease); - else - return generic_setlease(filp, arg, lease); -} - /** - * vfs_setlease - sets a lease on an open file - * @filp: file pointer - * @arg: type of lease to obtain - * @lease: file_lock to use - * - * Call this to establish a lease on the file. - * The (*lease)->fl_lmops->lm_break operation must be set; if not, - * break_lease will oops! - * - * This will call the filesystem's setlease file method, if - * defined. Note that there is no getlease method; instead, the - * filesystem setlease method should call back to setlease() to - * add a lease to the inode's lease list, where fcntl_getlease() can - * find it. Since fcntl_getlease() only reports whether the current - * task holds a lease, a cluster filesystem need only do this for - * leases held by processes on this node. - * - * There is also no break_lease method; filesystems that - * handle their own leases should break leases themselves from the - * filesystem's open, create, and (on truncate) setattr methods. - * - * Warning: the only current setlease methods exist only to disable - * leases in certain cases. More vfs changes may be required to - * allow a full filesystem lease implementation. + * vfs_setlease - sets a lease on an open file + * @filp: file pointer + * @arg: type of lease to obtain + * @lease: file_lock to use when adding a lease + * @priv: private info for lm_setup when adding a lease (may be + * NULL if lm_setup doesn't require it) + * + * Call this to establish a lease on the file. The "lease" argument is not + * used for F_UNLCK requests and may be NULL. For commands that set or alter + * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set; + * if not, this function will return -ENOLCK (and generate a scary-looking + * stack trace). + * + * The "priv" pointer is passed directly to the lm_setup function as-is. It + * may be NULL if the lm_setup operation doesn't require it. */ - -int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) +int +vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) { - int error; - - lock_flocks(); - error = __vfs_setlease(filp, arg, lease); - unlock_flocks(); - - return error; + if (filp->f_op->setlease) + return filp->f_op->setlease(filp, arg, lease, priv); + else + return generic_setlease(filp, arg, lease, priv); } EXPORT_SYMBOL_GPL(vfs_setlease); -static int do_fcntl_delete_lease(struct file *filp) -{ - struct file_lock fl, *flp = &fl; - - lease_init(filp, F_UNLCK, flp); - - return vfs_setlease(filp, F_UNLCK, &flp); -} - static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) { - struct file_lock *fl, *ret; + struct file_lock *fl; struct fasync_struct *new; int error; @@ -1531,30 +1826,11 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) locks_free_lock(fl); return -ENOMEM; } - ret = fl; - lock_flocks(); - error = __vfs_setlease(filp, arg, &ret); - if (error) { - unlock_flocks(); - locks_free_lock(fl); - goto out_free_fasync; - } - if (ret != fl) - locks_free_lock(fl); + new->fa_fd = fd; - /* - * fasync_insert_entry() returns the old entry if any. - * If there was no old entry, then it used 'new' and - * inserted it into the fasync list. Clear new so that - * we don't release it here. - */ - if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) - new = NULL; - - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); - unlock_flocks(); - -out_free_fasync: + error = vfs_setlease(filp, arg, &fl, (void **)&new); + if (fl) + locks_free_lock(fl); if (new) fasync_free(new); return error; @@ -1573,23 +1849,23 @@ out_free_fasync: int fcntl_setlease(unsigned int fd, struct file *filp, long arg) { if (arg == F_UNLCK) - return do_fcntl_delete_lease(filp); + return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); return do_fcntl_add_lease(fd, filp, arg); } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_file(filp, fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1602,7 +1878,29 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) return error; } -EXPORT_SYMBOL(flock_lock_file_wait); +/** + * locks_lock_inode_wait - Apply a lock to an inode + * @inode: inode of the file to apply to + * @fl: The lock to be applied + * + * Apply a POSIX or FLOCK style lock request to an inode. + */ +int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_inode_wait(inode, fl); + break; + case FL_FLOCK: + res = flock_lock_inode_wait(inode, fl); + break; + default: + BUG(); + } + return res; +} +EXPORT_SYMBOL(locks_lock_inode_wait); /** * sys_flock: - flock() system call. @@ -1642,9 +1940,12 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; - error = flock_make_lock(f.file, &lock, cmd); - if (error) + lock = flock_make_lock(f.file, cmd); + if (IS_ERR(lock)) { + error = PTR_ERR(lock); goto out_putf; + } + if (can_sleep) lock->fl_flags |= FL_SLEEP; @@ -1652,12 +1953,12 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (error) goto out_free; - if (f.file->f_op && f.file->f_op->flock) + if (f.file->f_op->flock) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); else - error = flock_lock_file_wait(f.file, lock); + error = locks_lock_file_wait(f.file, lock); out_free: locks_free_lock(lock); @@ -1678,7 +1979,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; @@ -1687,7 +1988,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { - flock->l_pid = fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via @@ -1709,7 +2010,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { - flock->l_pid = fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; @@ -1721,7 +2022,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk(struct file *filp, struct flock __user *l) +int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) { struct file_lock file_lock; struct flock flock; @@ -1738,6 +2039,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (error) goto out; + if (cmd == F_OFD_GETLK) { + error = -EINVAL; + if (flock.l_pid != 0) + goto out; + + cmd = F_GETLK; + file_lock.fl_flags |= FL_OFDLCK; + file_lock.fl_owner = filp; + } + error = vfs_test_lock(filp, &file_lock); if (error) goto out; @@ -1746,11 +2057,13 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (file_lock.fl_type != F_UNLCK) { error = posix_lock_to_flock(&flock, &file_lock); if (error) - goto out; + goto rel_priv; } error = -EFAULT; if (!copy_to_user(l, &flock, sizeof(flock))) error = 0; +rel_priv: + locks_release_private(&file_lock); out: return error; } @@ -1790,7 +2103,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); @@ -1821,6 +2134,22 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, return error; } +/* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */ +static int +check_fmode_for_setlk(struct file_lock *fl) +{ + switch (fl->fl_type) { + case F_RDLCK: + if (!(fl->fl_file->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(fl->fl_file->f_mode & FMODE_WRITE)) + return -EBADF; + } + return 0; +} + /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ @@ -1853,29 +2182,39 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; - if (cmd == F_SETLKW) { - file_lock->fl_flags |= FL_SLEEP; - } - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) + + error = check_fmode_for_setlk(file_lock); + if (error) + goto out; + + /* + * If the cmd is requesting file-private locks, then set the + * FL_OFDLCK flag and override the owner. + */ + switch (cmd) { + case F_OFD_SETLK: + error = -EINVAL; + if (flock.l_pid != 0) goto out; + + cmd = F_SETLK; + file_lock->fl_flags |= FL_OFDLCK; + file_lock->fl_owner = filp; break; - case F_UNLCK: - break; - default: + case F_OFD_SETLKW: error = -EINVAL; - goto out; + if (flock.l_pid != 0) + goto out; + + cmd = F_SETLKW; + file_lock->fl_flags |= FL_OFDLCK; + file_lock->fl_owner = filp; + /* Fallthrough */ + case F_SETLKW: + file_lock->fl_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); @@ -1884,19 +2223,22 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - /* - * we need that spin_lock here - it prevents reordering between - * update of inode->i_flock and check for it done in close(). - * rcu_read_lock() wouldn't do. - */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -1906,7 +2248,7 @@ out: /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk64(struct file *filp, struct flock64 __user *l) +int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) { struct file_lock file_lock; struct flock64 flock; @@ -1923,6 +2265,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) if (error) goto out; + if (cmd == F_OFD_GETLK) { + error = -EINVAL; + if (flock.l_pid != 0) + goto out; + + cmd = F_GETLK64; + file_lock.fl_flags |= FL_OFDLCK; + file_lock.fl_owner = filp; + } + error = vfs_test_lock(filp, &file_lock); if (error) goto out; @@ -1934,7 +2286,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) error = -EFAULT; if (!copy_to_user(l, &flock, sizeof(flock))) error = 0; - + + locks_release_private(&file_lock); out: return error; } @@ -1971,29 +2324,39 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; - if (cmd == F_SETLKW64) { - file_lock->fl_flags |= FL_SLEEP; - } - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) + + error = check_fmode_for_setlk(file_lock); + if (error) + goto out; + + /* + * If the cmd is requesting file-private locks, then set the + * FL_OFDLCK flag and override the owner. + */ + switch (cmd) { + case F_OFD_SETLK: + error = -EINVAL; + if (flock.l_pid != 0) goto out; + + cmd = F_SETLK64; + file_lock->fl_flags |= FL_OFDLCK; + file_lock->fl_owner = filp; break; - case F_UNLCK: - break; - default: + case F_OFD_SETLKW: error = -EINVAL; - goto out; + if (flock.l_pid != 0) + goto out; + + cmd = F_SETLKW64; + file_lock->fl_flags |= FL_OFDLCK; + file_lock->fl_owner = filp; + /* Fallthrough */ + case F_SETLKW64: + file_lock->fl_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); @@ -2002,14 +2365,22 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -2024,13 +2395,15 @@ out: void locks_remove_posix(struct file *filp, fl_owner_t owner) { struct file_lock lock; + struct file_lock_context *ctx; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - if (!file_inode(filp)->i_flock) + ctx = smp_load_acquire(&file_inode(filp)->i_flctx); + if (!ctx || list_empty(&ctx->flc_posix)) return; lock.fl_type = F_UNLCK; @@ -2051,73 +2424,90 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) EXPORT_SYMBOL(locks_remove_posix); +/* The i_flctx must be valid when calling into here */ +static void +locks_remove_flock(struct file *filp, struct file_lock_context *flctx) +{ + struct file_lock fl = { + .fl_owner = filp, + .fl_pid = current->tgid, + .fl_file = filp, + .fl_flags = FL_FLOCK, + .fl_type = F_UNLCK, + .fl_end = OFFSET_MAX, + }; + struct inode *inode = file_inode(filp); + + if (list_empty(&flctx->flc_flock)) + return; + + if (filp->f_op->flock) + filp->f_op->flock(filp, F_SETLKW, &fl); + else + flock_lock_inode(inode, &fl); + + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); +} + +/* The i_flctx must be valid when calling into here */ +static void +locks_remove_lease(struct file *filp, struct file_lock_context *ctx) +{ + struct file_lock *fl, *tmp; + LIST_HEAD(dispose); + + if (list_empty(&ctx->flc_lease)) + return; + + spin_lock(&ctx->flc_lock); + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) + if (filp == fl->fl_file) + lease_modify(fl, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); +} + /* * This function is called on the last close of an open file. */ -void locks_remove_flock(struct file *filp) +void locks_remove_file(struct file *filp) { - struct inode * inode = file_inode(filp); - struct file_lock *fl; - struct file_lock **before; + struct file_lock_context *ctx; - if (!inode->i_flock) + ctx = smp_load_acquire(&file_inode(filp)->i_flctx); + if (!ctx) return; - if (filp->f_op && filp->f_op->flock) { - struct file_lock fl = { - .fl_pid = current->tgid, - .fl_file = filp, - .fl_flags = FL_FLOCK, - .fl_type = F_UNLCK, - .fl_end = OFFSET_MAX, - }; - filp->f_op->flock(filp, F_SETLKW, &fl); - if (fl.fl_ops && fl.fl_ops->fl_release_private) - fl.fl_ops->fl_release_private(&fl); - } + /* remove any OFD locks */ + locks_remove_posix(filp, filp); - lock_flocks(); - before = &inode->i_flock; + /* remove flock locks */ + locks_remove_flock(filp, ctx); - while ((fl = *before) != NULL) { - if (fl->fl_file == filp) { - if (IS_FLOCK(fl)) { - locks_delete_lock(before); - continue; - } - if (IS_LEASE(fl)) { - lease_modify(before, F_UNLCK); - continue; - } - /* What? */ - BUG(); - } - before = &fl->fl_next; - } - unlock_flocks(); + /* remove any leases */ + locks_remove_lease(filp, ctx); } /** * posix_unblock_lock - stop waiting for a file lock - * @filp: how the file was opened * @waiter: the lock which was waiting * * lockd needs to block waiting for locks. */ int -posix_unblock_lock(struct file *filp, struct file_lock *waiter) +posix_unblock_lock(struct file_lock *waiter) { int status = 0; - lock_flocks(); + spin_lock(&blocked_lock_lock); if (waiter->fl_next) __locks_delete_block(waiter); else status = -ENOENT; - unlock_flocks(); + spin_unlock(&blocked_lock_lock); return status; } - EXPORT_SYMBOL(posix_unblock_lock); /** @@ -2129,7 +2519,7 @@ EXPORT_SYMBOL(posix_unblock_lock); */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } @@ -2140,6 +2530,11 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock); #include #include +struct locks_iterator { + int li_cpu; + loff_t li_pos; +}; + static void lock_get_status(struct seq_file *f, struct file_lock *fl, loff_t id, char *pfx) { @@ -2156,26 +2551,36 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, seq_printf(f, "%lld:%s ", id, pfx); if (IS_POSIX(fl)) { - seq_printf(f, "%6s %s ", - (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", + if (fl->fl_flags & FL_ACCESS) + seq_puts(f, "ACCESS"); + else if (IS_OFDLCK(fl)) + seq_puts(f, "OFDLCK"); + else + seq_puts(f, "POSIX "); + + seq_printf(f, " %s ", (inode == NULL) ? "*NOINODE*" : mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { if (fl->fl_type & LOCK_MAND) { - seq_printf(f, "FLOCK MSNFS "); + seq_puts(f, "FLOCK MSNFS "); } else { - seq_printf(f, "FLOCK ADVISORY "); + seq_puts(f, "FLOCK ADVISORY "); } } else if (IS_LEASE(fl)) { - seq_printf(f, "LEASE "); + if (fl->fl_flags & FL_DELEG) + seq_puts(f, "DELEG "); + else + seq_puts(f, "LEASE "); + if (lease_breaking(fl)) - seq_printf(f, "BREAKING "); + seq_puts(f, "BREAKING "); else if (fl->fl_file) - seq_printf(f, "ACTIVE "); + seq_puts(f, "ACTIVE "); else - seq_printf(f, "BREAKER "); + seq_puts(f, "BREAKER "); } else { - seq_printf(f, "UNKNOWN UNKNOWN "); + seq_puts(f, "UNKNOWN UNKNOWN "); } if (fl->fl_type & LOCK_MAND) { seq_printf(f, "%s ", @@ -2189,15 +2594,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); } if (inode) { -#ifdef WE_CAN_BREAK_LSLK_NOW - seq_printf(f, "%d %s:%ld ", fl_pid, - inode->i_sb->s_id, inode->i_ino); -#else - /* userspace relies on this representation of dev_t ;-( */ + /* userspace relies on this representation of dev_t */ seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); -#endif } else { seq_printf(f, "%d :0 ", fl_pid); } @@ -2207,43 +2607,87 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, else seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); } else { - seq_printf(f, "0 EOF\n"); + seq_puts(f, "0 EOF\n"); } } static int locks_show(struct seq_file *f, void *v) { + struct locks_iterator *iter = f->private; struct file_lock *fl, *bfl; - fl = list_entry(v, struct file_lock, fl_link); + fl = hlist_entry(v, struct file_lock, fl_link); - lock_get_status(f, fl, *((loff_t *)f->private), ""); + lock_get_status(f, fl, iter->li_pos, ""); list_for_each_entry(bfl, &fl->fl_block, fl_block) - lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); + lock_get_status(f, bfl, iter->li_pos, " ->"); return 0; } +static void __show_fd_locks(struct seq_file *f, + struct list_head *head, int *id, + struct file *filp, struct files_struct *files) +{ + struct file_lock *fl; + + list_for_each_entry(fl, head, fl_list) { + + if (filp != fl->fl_file) + continue; + if (fl->fl_owner != files && + fl->fl_owner != filp) + continue; + + (*id)++; + seq_puts(f, "lock:\t"); + lock_get_status(f, fl, *id, ""); + } +} + +void show_fd_locks(struct seq_file *f, + struct file *filp, struct files_struct *files) +{ + struct inode *inode = file_inode(filp); + struct file_lock_context *ctx; + int id = 0; + + ctx = smp_load_acquire(&inode->i_flctx); + if (!ctx) + return; + + spin_lock(&ctx->flc_lock); + __show_fd_locks(f, &ctx->flc_flock, &id, filp, files); + __show_fd_locks(f, &ctx->flc_posix, &id, filp, files); + __show_fd_locks(f, &ctx->flc_lease, &id, filp, files); + spin_unlock(&ctx->flc_lock); +} + static void *locks_start(struct seq_file *f, loff_t *pos) + __acquires(&blocked_lock_lock) { - loff_t *p = f->private; + struct locks_iterator *iter = f->private; - lock_flocks(); - *p = (*pos + 1); - return seq_list_start(&file_lock_list, *pos); + iter->li_pos = *pos + 1; + lg_global_lock(&file_lock_lglock); + spin_lock(&blocked_lock_lock); + return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); } static void *locks_next(struct seq_file *f, void *v, loff_t *pos) { - loff_t *p = f->private; - ++*p; - return seq_list_next(v, &file_lock_list, pos); + struct locks_iterator *iter = f->private; + + ++iter->li_pos; + return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); } static void locks_stop(struct seq_file *f, void *v) + __releases(&blocked_lock_lock) { - unlock_flocks(); + spin_unlock(&blocked_lock_lock); + lg_global_unlock(&file_lock_lglock); } static const struct seq_operations locks_seq_operations = { @@ -2255,7 +2699,8 @@ static const struct seq_operations locks_seq_operations = { static int locks_open(struct inode *inode, struct file *filp) { - return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); + return seq_open_private(filp, &locks_seq_operations, + sizeof(struct locks_iterator)); } static const struct file_operations proc_locks_operations = { @@ -2273,89 +2718,21 @@ static int __init proc_locks_init(void) module_init(proc_locks_init); #endif -/** - * lock_may_read - checks that the region is free of locks - * @inode: the inode that is being read - * @start: the first byte to read - * @len: the number of bytes to read - * - * Emulates Windows locking requirements. Whole-file - * mandatory locks (share modes) can prohibit a read and - * byte-range POSIX locks can prohibit a read if they overlap. - * - * N.B. this function is only ever called - * from knfsd and ownership of locks is never checked. - */ -int lock_may_read(struct inode *inode, loff_t start, unsigned long len) -{ - struct file_lock *fl; - int result = 1; - lock_flocks(); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (IS_POSIX(fl)) { - if (fl->fl_type == F_RDLCK) - continue; - if ((fl->fl_end < start) || (fl->fl_start > (start + len))) - continue; - } else if (IS_FLOCK(fl)) { - if (!(fl->fl_type & LOCK_MAND)) - continue; - if (fl->fl_type & LOCK_READ) - continue; - } else - continue; - result = 0; - break; - } - unlock_flocks(); - return result; -} - -EXPORT_SYMBOL(lock_may_read); - -/** - * lock_may_write - checks that the region is free of locks - * @inode: the inode that is being written - * @start: the first byte to write - * @len: the number of bytes to write - * - * Emulates Windows locking requirements. Whole-file - * mandatory locks (share modes) can prohibit a write and - * byte-range POSIX locks can prohibit a write if they overlap. - * - * N.B. this function is only ever called - * from knfsd and ownership of locks is never checked. - */ -int lock_may_write(struct inode *inode, loff_t start, unsigned long len) +static int __init filelock_init(void) { - struct file_lock *fl; - int result = 1; - lock_flocks(); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (IS_POSIX(fl)) { - if ((fl->fl_end < start) || (fl->fl_start > (start + len))) - continue; - } else if (IS_FLOCK(fl)) { - if (!(fl->fl_type & LOCK_MAND)) - continue; - if (fl->fl_type & LOCK_WRITE) - continue; - } else - continue; - result = 0; - break; - } - unlock_flocks(); - return result; -} + int i; -EXPORT_SYMBOL(lock_may_write); + flctx_cache = kmem_cache_create("file_lock_ctx", + sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); -static int __init filelock_init(void) -{ filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); + lg_lock_init(&file_lock_lglock, "file_lock_lglock"); + + for_each_possible_cpu(i) + INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); + return 0; }