drm: bridge: dw-hdmi: set ddc scl clock rate according to dts
[firefly-linux-kernel-4.4.55.git] / mm / truncate.c
index 2d6151fc8f083629a8c4e95ddd157afbb76b8772..76e35ad971025ce5eb3781543537d1bf3b947b8d 100644 (file)
 #include <linux/rmap.h>
 #include "internal.h"
 
+static void clear_exceptional_entry(struct address_space *mapping,
+                                   pgoff_t index, void *entry)
+{
+       struct radix_tree_node *node;
+       void **slot;
+
+       /* Handled by shmem itself */
+       if (shmem_mapping(mapping))
+               return;
+
+       spin_lock_irq(&mapping->tree_lock);
+       /*
+        * Regular page slots are stabilized by the page lock even
+        * without the tree itself locked.  These unlocked entries
+        * need verification under the tree lock.
+        */
+       if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
+               goto unlock;
+       if (*slot != entry)
+               goto unlock;
+       radix_tree_replace_slot(slot, NULL);
+       mapping->nrshadows--;
+       if (!node)
+               goto unlock;
+       workingset_node_shadows_dec(node);
+       /*
+        * Don't track node without shadow entries.
+        *
+        * Avoid acquiring the list_lru lock if already untracked.
+        * The list_empty() test is safe as node->private_list is
+        * protected by mapping->tree_lock.
+        */
+       if (!workingset_node_shadows(node) &&
+           !list_empty(&node->private_list))
+               list_lru_del(&workingset_shadow_nodes, &node->private_list);
+       __radix_tree_delete_node(&mapping->page_tree, node);
+unlock:
+       spin_unlock_irq(&mapping->tree_lock);
+}
 
 /**
  * do_invalidatepage - invalidate part or all of a page
  * @page: the page which is affected
- * @offset: the index of the truncation point
+ * @offset: start of the range to invalidate
+ * @length: length of the range to invalidate
  *
  * do_invalidatepage() is called when all or part of the page has become
  * invalidated by a truncate operation.
  * point.  Because the caller is about to free (and possibly reuse) those
  * blocks on-disk.
  */
-void do_invalidatepage(struct page *page, unsigned long offset)
+void do_invalidatepage(struct page *page, unsigned int offset,
+                      unsigned int length)
 {
-       void (*invalidatepage)(struct page *, unsigned long);
+       void (*invalidatepage)(struct page *, unsigned int, unsigned int);
+
        invalidatepage = page->mapping->a_ops->invalidatepage;
 #ifdef CONFIG_BLOCK
        if (!invalidatepage)
                invalidatepage = block_invalidatepage;
 #endif
        if (invalidatepage)
-               (*invalidatepage)(page, offset);
-}
-
-static inline void truncate_partial_page(struct page *page, unsigned partial)
-{
-       zero_user_segment(page, partial, PAGE_CACHE_SIZE);
-       cleancache_invalidate_page(page->mapping, page);
-       if (page_has_private(page))
-               do_invalidatepage(page, partial);
+               (*invalidatepage)(page, offset, length);
 }
 
-/*
- * This cancels just the dirty bit on the kernel page itself, it
- * does NOT actually remove dirty bits on any mmap's that may be
- * around. It also leaves the page tagged dirty, so any sync
- * activity will still find it on the dirty lists, and in particular,
- * clear_page_dirty_for_io() will still look at the dirty bits in
- * the VM.
- *
- * Doing this should *normally* only ever be done when a page
- * is truncated, and is not actually mapped anywhere at all. However,
- * fs/buffer.c does this when it notices that somebody has cleaned
- * out all the buffers on a page without actually doing it through
- * the VM. Can you say "ext3 is horribly ugly"? Tought you could.
- */
-void cancel_dirty_page(struct page *page, unsigned int account_size)
-{
-       if (TestClearPageDirty(page)) {
-               struct address_space *mapping = page->mapping;
-               if (mapping && mapping_cap_account_dirty(mapping)) {
-                       dec_zone_page_state(page, NR_FILE_DIRTY);
-                       dec_bdi_stat(mapping->backing_dev_info,
-                                       BDI_RECLAIMABLE);
-                       if (account_size)
-                               task_io_account_cancelled_write(account_size);
-               }
-       }
-}
-EXPORT_SYMBOL(cancel_dirty_page);
-
 /*
  * If truncate cannot remove the fs-private metadata from the page, the page
  * becomes orphaned.  It will be left on the LRU and may even be mapped into
@@ -104,10 +109,14 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
                return -EIO;
 
        if (page_has_private(page))
-               do_invalidatepage(page, 0);
-
-       cancel_dirty_page(page, PAGE_CACHE_SIZE);
+               do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
 
+       /*
+        * Some filesystems seem to re-dirty the page even after
+        * the VM has canceled the dirty bit (eg ext3 journaling).
+        * Hence dirty accounting check is placed after invalidation.
+        */
+       cancel_dirty_page(page);
        ClearPageMappedToDisk(page);
        delete_from_page_cache(page);
        return 0;
@@ -186,11 +195,11 @@ int invalidate_inode_page(struct page *page)
  * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
  * @mapping: mapping to truncate
  * @lstart: offset from which to truncate
- * @lend: offset to which to truncate
+ * @lend: offset to which to truncate (inclusive)
  *
  * Truncate the page cache, removing the pages that are between
- * specified offsets (and zeroing out partial page
- * (if lstart is not page aligned)).
+ * specified offsets (and zeroing out partial pages
+ * if lstart or lend + 1 is not page aligned).
  *
  * Truncate takes two passes - the first pass is nonblocking.  It will not
  * block on page locks and it will not block on writeback.  The second pass
@@ -201,37 +210,66 @@ int invalidate_inode_page(struct page *page)
  * We pass down the cache-hot hint to the page freeing code.  Even if the
  * mapping is large, it is probably the case that the final pages are the most
  * recently touched, and freeing happens in ascending file offset order.
+ *
+ * Note that since ->invalidatepage() accepts range to invalidate
+ * truncate_inode_pages_range is able to handle cases where lend + 1 is not
+ * page aligned properly.
  */
 void truncate_inode_pages_range(struct address_space *mapping,
                                loff_t lstart, loff_t lend)
 {
-       const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
-       const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
-       struct pagevec pvec;
-       pgoff_t index;
-       pgoff_t end;
-       int i;
+       pgoff_t         start;          /* inclusive */
+       pgoff_t         end;            /* exclusive */
+       unsigned int    partial_start;  /* inclusive */
+       unsigned int    partial_end;    /* exclusive */
+       struct pagevec  pvec;
+       pgoff_t         indices[PAGEVEC_SIZE];
+       pgoff_t         index;
+       int             i;
 
        cleancache_invalidate_inode(mapping);
-       if (mapping->nrpages == 0)
+       if (mapping->nrpages == 0 && mapping->nrshadows == 0)
                return;
 
-       BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
-       end = (lend >> PAGE_CACHE_SHIFT);
+       /* Offsets within partial pages */
+       partial_start = lstart & (PAGE_CACHE_SIZE - 1);
+       partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
+
+       /*
+        * 'start' and 'end' always covers the range of pages to be fully
+        * truncated. Partial pages are covered with 'partial_start' at the
+        * start of the range and 'partial_end' at the end of the range.
+        * Note that 'end' is exclusive while 'lend' is inclusive.
+        */
+       start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       if (lend == -1)
+               /*
+                * lend == -1 indicates end-of-file so we have to set 'end'
+                * to the highest possible pgoff_t and since the type is
+                * unsigned we're using -1.
+                */
+               end = -1;
+       else
+               end = (lend + 1) >> PAGE_CACHE_SHIFT;
 
        pagevec_init(&pvec, 0);
        index = start;
-       while (index <= end && pagevec_lookup(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
-               mem_cgroup_uncharge_start();
+       while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE),
+                       indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
-                       index = page->index;
-                       if (index > end)
+                       index = indices[i];
+                       if (index >= end)
                                break;
 
+                       if (radix_tree_exceptional_entry(page)) {
+                               clear_exceptional_entry(mapping, index, page);
+                               continue;
+                       }
+
                        if (!trylock_page(page))
                                continue;
                        WARN_ON(page->index != index);
@@ -242,44 +280,84 @@ void truncate_inode_pages_range(struct address_space *mapping,
                        truncate_inode_page(mapping, page);
                        unlock_page(page);
                }
+               pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
 
-       if (partial) {
+       if (partial_start) {
                struct page *page = find_lock_page(mapping, start - 1);
                if (page) {
+                       unsigned int top = PAGE_CACHE_SIZE;
+                       if (start > end) {
+                               /* Truncation within a single page */
+                               top = partial_end;
+                               partial_end = 0;
+                       }
                        wait_on_page_writeback(page);
-                       truncate_partial_page(page, partial);
+                       zero_user_segment(page, partial_start, top);
+                       cleancache_invalidate_page(mapping, page);
+                       if (page_has_private(page))
+                               do_invalidatepage(page, partial_start,
+                                                 top - partial_start);
                        unlock_page(page);
                        page_cache_release(page);
                }
        }
+       if (partial_end) {
+               struct page *page = find_lock_page(mapping, end);
+               if (page) {
+                       wait_on_page_writeback(page);
+                       zero_user_segment(page, 0, partial_end);
+                       cleancache_invalidate_page(mapping, page);
+                       if (page_has_private(page))
+                               do_invalidatepage(page, 0,
+                                                 partial_end);
+                       unlock_page(page);
+                       page_cache_release(page);
+               }
+       }
+       /*
+        * If the truncation happened within a single page no pages
+        * will be released, just zeroed, so we can bail out now.
+        */
+       if (start >= end)
+               return;
 
        index = start;
        for ( ; ; ) {
                cond_resched();
-               if (!pagevec_lookup(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+               if (!pagevec_lookup_entries(&pvec, mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
+                       /* If all gone from start onwards, we're done */
                        if (index == start)
                                break;
+                       /* Otherwise restart to make sure all gone */
                        index = start;
                        continue;
                }
-               if (index == start && pvec.pages[0]->index > end) {
+               if (index == start && indices[0] >= end) {
+                       /* All gone out of hole to be punched, we're done */
+                       pagevec_remove_exceptionals(&pvec);
                        pagevec_release(&pvec);
                        break;
                }
-               mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
-                       index = page->index;
-                       if (index > end)
+                       index = indices[i];
+                       if (index >= end) {
+                               /* Restart punch to make sure all gone */
+                               index = start - 1;
                                break;
+                       }
+
+                       if (radix_tree_exceptional_entry(page)) {
+                               clear_exceptional_entry(mapping, index, page);
+                               continue;
+                       }
 
                        lock_page(page);
                        WARN_ON(page->index != index);
@@ -287,8 +365,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
                        truncate_inode_page(mapping, page);
                        unlock_page(page);
                }
+               pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                index++;
        }
        cleancache_invalidate_inode(mapping);
@@ -313,6 +391,53 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 }
 EXPORT_SYMBOL(truncate_inode_pages);
 
+/**
+ * truncate_inode_pages_final - truncate *all* pages before inode dies
+ * @mapping: mapping to truncate
+ *
+ * Called under (and serialized by) inode->i_mutex.
+ *
+ * Filesystems have to use this in the .evict_inode path to inform the
+ * VM that this is the final truncate and the inode is going away.
+ */
+void truncate_inode_pages_final(struct address_space *mapping)
+{
+       unsigned long nrshadows;
+       unsigned long nrpages;
+
+       /*
+        * Page reclaim can not participate in regular inode lifetime
+        * management (can't call iput()) and thus can race with the
+        * inode teardown.  Tell it when the address space is exiting,
+        * so that it does not install eviction information after the
+        * final truncate has begun.
+        */
+       mapping_set_exiting(mapping);
+
+       /*
+        * When reclaim installs eviction entries, it increases
+        * nrshadows first, then decreases nrpages.  Make sure we see
+        * this in the right order or we might miss an entry.
+        */
+       nrpages = mapping->nrpages;
+       smp_rmb();
+       nrshadows = mapping->nrshadows;
+
+       if (nrpages || nrshadows) {
+               /*
+                * As truncation uses a lockless tree lookup, cycle
+                * the tree lock to make sure any ongoing tree
+                * modification that does not see AS_EXITING is
+                * completed before starting the final truncate.
+                */
+               spin_lock_irq(&mapping->tree_lock);
+               spin_unlock_irq(&mapping->tree_lock);
+
+               truncate_inode_pages(mapping, 0);
+       }
+}
+EXPORT_SYMBOL(truncate_inode_pages_final);
+
 /**
  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
  * @mapping: the address_space which holds the pages to invalidate
@@ -329,32 +454,30 @@ EXPORT_SYMBOL(truncate_inode_pages);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t end)
 {
+       pgoff_t indices[PAGEVEC_SIZE];
        struct pagevec pvec;
        pgoff_t index = start;
        unsigned long ret;
        unsigned long count = 0;
        int i;
 
-       /*
-        * Note: this function may get called on a shmem/tmpfs mapping:
-        * pagevec_lookup() might then return 0 prematurely (because it
-        * got a gangful of swap entries); but it's hardly worth worrying
-        * about - it can rarely have anything to free from such a mapping
-        * (most pages are dirty), and already skips over any difficulties.
-        */
-
        pagevec_init(&pvec, 0);
-       while (index <= end && pagevec_lookup(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
-               mem_cgroup_uncharge_start();
+       while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+                       indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
-                       index = page->index;
+                       index = indices[i];
                        if (index > end)
                                break;
 
+                       if (radix_tree_exceptional_entry(page)) {
+                               clear_exceptional_entry(mapping, index, page);
+                               continue;
+                       }
+
                        if (!trylock_page(page))
                                continue;
                        WARN_ON(page->index != index);
@@ -365,11 +488,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                         * of interest and try to speed up its reclaim.
                         */
                        if (!ret)
-                               deactivate_page(page);
+                               deactivate_file_page(page);
                        count += ret;
                }
+               pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
@@ -387,20 +510,24 @@ EXPORT_SYMBOL(invalidate_mapping_pages);
 static int
 invalidate_complete_page2(struct address_space *mapping, struct page *page)
 {
+       struct mem_cgroup *memcg;
+       unsigned long flags;
+
        if (page->mapping != mapping)
                return 0;
 
        if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
                return 0;
 
-       spin_lock_irq(&mapping->tree_lock);
+       memcg = mem_cgroup_begin_page_stat(page);
+       spin_lock_irqsave(&mapping->tree_lock, flags);
        if (PageDirty(page))
                goto failed;
 
        BUG_ON(page_has_private(page));
-       __delete_from_page_cache(page);
-       spin_unlock_irq(&mapping->tree_lock);
-       mem_cgroup_uncharge_cache_page(page);
+       __delete_from_page_cache(page, NULL, memcg);
+       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       mem_cgroup_end_page_stat(memcg);
 
        if (mapping->a_ops->freepage)
                mapping->a_ops->freepage(page);
@@ -408,7 +535,8 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        page_cache_release(page);       /* pagecache ref */
        return 1;
 failed:
-       spin_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       mem_cgroup_end_page_stat(memcg);
        return 0;
 }
 
@@ -435,6 +563,7 @@ static int do_launder_page(struct address_space *mapping, struct page *page)
 int invalidate_inode_pages2_range(struct address_space *mapping,
                                  pgoff_t start, pgoff_t end)
 {
+       pgoff_t indices[PAGEVEC_SIZE];
        struct pagevec pvec;
        pgoff_t index;
        int i;
@@ -445,17 +574,22 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
        cleancache_invalidate_inode(mapping);
        pagevec_init(&pvec, 0);
        index = start;
-       while (index <= end && pagevec_lookup(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
-               mem_cgroup_uncharge_start();
+       while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+                       indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
-                       index = page->index;
+                       index = indices[i];
                        if (index > end)
                                break;
 
+                       if (radix_tree_exceptional_entry(page)) {
+                               clear_exceptional_entry(mapping, index, page);
+                               continue;
+                       }
+
                        lock_page(page);
                        WARN_ON(page->index != index);
                        if (page->mapping != mapping) {
@@ -493,8 +627,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                ret = ret2;
                        unlock_page(page);
                }
+               pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
-               mem_cgroup_uncharge_end();
                cond_resched();
                index++;
        }
@@ -521,7 +655,6 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
 /**
  * truncate_pagecache - unmap and remove pagecache that has been truncated
  * @inode: inode
- * @oldsize: old file size
  * @newsize: new file size
  *
  * inode's new i_size must already be written before truncate_pagecache
@@ -534,7 +667,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
  * situations such as writepage being called for a page that has already
  * had its underlying blocks deallocated.
  */
-void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
+void truncate_pagecache(struct inode *inode, loff_t newsize)
 {
        struct address_space *mapping = inode->i_mapping;
        loff_t holebegin = round_up(newsize, PAGE_SIZE);
@@ -563,8 +696,9 @@ EXPORT_SYMBOL(truncate_pagecache);
  * necessary) to @newsize. It will be typically be called from the filesystem's
  * setattr function when ATTR_SIZE is passed in.
  *
- * Must be called with inode_mutex held and before all filesystem specific
- * block truncation has been performed.
+ * Must be called with a lock serializing truncates and writes (generally
+ * i_mutex but e.g. xfs uses a different lock) and before all filesystem
+ * specific block truncation has been performed.
  */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
@@ -573,7 +707,7 @@ void truncate_setsize(struct inode *inode, loff_t newsize)
        i_size_write(inode, newsize);
        if (newsize > oldsize)
                pagecache_isize_extended(inode, oldsize, newsize);
-       truncate_pagecache(inode, oldsize, newsize);
+       truncate_pagecache(inode, newsize);
 }
 EXPORT_SYMBOL(truncate_setsize);
 
@@ -650,10 +784,8 @@ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
         * This rounding is currently just for example: unmap_mapping_range
         * expands its hole outwards, whereas we want it to contract the hole
         * inwards.  However, existing callers of truncate_pagecache_range are
-        * doing their own page rounding first; and truncate_inode_pages_range
-        * currently BUGs if lend is not pagealigned-1 (it handles partial
-        * page at start of hole, but not partial page at end of hole).  Note
-        * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
+        * doing their own page rounding first.  Note that unmap_mapping_range
+        * allows holelen 0 for all, and we allow lend -1 for end of file.
         */
 
        /*