Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penber...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Jan 2012 02:52:23 +0000 (18:52 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 12 Jan 2012 02:52:23 +0000 (18:52 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Jan 2012 02:52:23 +0000 (18:52 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Jan 2012 02:52:23 +0000 (18:52 -0800)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index c92b1532f05adadf9f8aaeac671434080b444424..a8d389d72405030eec327a6974811f822f56997c 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         slram=          [HW,MTD]
  
+       slab_max_order= [MM, SLAB]
+                       Determines the maximum allowed order for slabs.
+                       A high setting may cause OOMs due to memory
+                       fragmentation.  Defaults to 1 for systems with
+                       more than 32MB of RAM, 0 otherwise.
+
         slub_debug[=options[,slabs]]    [MM, SLUB]
                         Enabling slub_debug allows one to determine the
                         culprit if slab objects become corrupted. Enabling
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt

index f464f47bc60dacd77fecc7797dfe658708acb41e..2acdda9601b097958ad4ea732ba784a1ed4fffce 100644 (file)
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -117,7 +117,7 @@ can be influenced by kernel parameters:
  
  slub_min_objects=x             (default 4)
  slub_min_order=x               (default 0)
-slub_max_order=x               (default 1)
+slub_max_order=x               (default 3 (PAGE_ALLOC_COSTLY_ORDER))
  
  slub_min_objects allows to specify how many objects must at least fit
  into one slab in order for the allocation order to be acceptable.
diff --git a/mm/slab.c b/mm/slab.c

index 2acfa0d9094379ae999c1937dd9d4ed475af1837..f0bd7857ab3bed2adf6649e60dda6ad712ef0b92 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
  #endif
  
  /*
- * Do not go above this order unless 0 objects fit into the slab.
+ * Do not go above this order unless 0 objects fit into the slab or
+ * overridden on the command line.
   */
-#define        BREAK_GFP_ORDER_HI      1
-#define        BREAK_GFP_ORDER_LO      0
-static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+#define        SLAB_MAX_ORDER_HI       1
+#define        SLAB_MAX_ORDER_LO       0
+static int slab_max_order = SLAB_MAX_ORDER_LO;
+static bool slab_max_order_set __initdata;
  
  /*
   * Functions for storing/retrieving the cachep and or slab from the page
@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
  }
  __setup("noaliencache", noaliencache_setup);
  
+static int __init slab_max_order_setup(char *str)
+{
+       get_option(&str, &slab_max_order);
+       slab_max_order = slab_max_order < 0 ? 0 :
+                               min(slab_max_order, MAX_ORDER - 1);
+       slab_max_order_set = true;
+
+       return 1;
+}
+__setup("slab_max_order=", slab_max_order_setup);
+
  #ifdef CONFIG_NUMA
  /*
   * Special reaping functions for NUMA systems called from cache_reap().
@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
  
         /*
          * Fragmentation resistance on low memory - only use bigger
-        * page orders on machines with more than 32MB of memory.
+        * page orders on machines with more than 32MB of memory if
+        * not overridden on the command line.
          */
-       if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
-               slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+       if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
+               slab_max_order = SLAB_MAX_ORDER_HI;
  
         /* Bootstrap is tricky, because several objects are allocated
          * from caches that do not exist yet:
@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                         /* Print header */
                         if (lines == 0) {
                                 printk(KERN_ERR
-                                       "Slab corruption: %s start=%p, len=%d\n",
-                                       cachep->name, realobj, size);
+                                       "Slab corruption (%s): %s start=%p, len=%d\n",
+                                       print_tainted(), cachep->name, realobj, size);
                                 print_objinfo(cachep, objp, 0);
                         }
                         /* Hexdump the affected line */
@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
                  * Large number of objects is good, but very large slabs are
                  * currently bad for the gfp()s.
                  */
-               if (gfporder >= slab_break_gfp_order)
+               if (gfporder >= slab_max_order)
                         break;
  
                 /*
@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
         if (entries != cachep->num - slabp->inuse) {
  bad:
                 printk(KERN_ERR "slab: Internal list corruption detected in "
-                               "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
-                       cachep->name, cachep->num, slabp, slabp->inuse);
+                       "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
+                       cachep->name, cachep->num, slabp, slabp->inuse,
+                       print_tainted());
                 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
                         sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
                         1);
diff --git a/mm/slub.c b/mm/slub.c

index d99acbf14e0179c766332b5f54f4f946b37a5d0f..5d37b5e44140f2cd0884fc1ae7798ec02b0afb12 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
         va_end(args);
         printk(KERN_ERR "========================================"
                         "=====================================\n");
-       printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
+       printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
         printk(KERN_ERR "----------------------------------------"
                         "-------------------------------------\n\n");
  }
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
                         }
  
                         if (l != m) {
-                               if (l == M_PARTIAL)
+                               if (l == M_PARTIAL) {
                                         remove_partial(n, page);
-                               else
+                                       stat(s, FREE_REMOVE_PARTIAL);
+                               } else {
                                         add_partial(n, page,
                                                 DEACTIVATE_TO_TAIL);
+                                       stat(s, FREE_ADD_PARTIAL);
+                               }
  
                                 l = m;
                         }
@@ -2123,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
         return object;
  }
  
+/*
+ * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
+ * or deactivate the page.
+ *
+ * The page is still frozen if the return value is not NULL.
+ *
+ * If this function returns NULL then the page has been unfrozen.
+ */
+static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+{
+       struct page new;
+       unsigned long counters;
+       void *freelist;
+
+       do {
+               freelist = page->freelist;
+               counters = page->counters;
+               new.counters = counters;
+               VM_BUG_ON(!new.frozen);
+
+               new.inuse = page->objects;
+               new.frozen = freelist != NULL;
+
+       } while (!cmpxchg_double_slab(s, page,
+               freelist, counters,
+               NULL, new.counters,
+               "get_freelist"));
+
+       return freelist;
+}
+
  /*
   * Slow path. The lockless freelist is empty or we need to perform
   * debugging duties.
@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  {
         void **object;
         unsigned long flags;
-       struct page new;
-       unsigned long counters;
  
         local_irq_save(flags);
  #ifdef CONFIG_PREEMPT
@@ -2166,31 +2198,14 @@ redo:
                 goto new_slab;
         }
  
-       stat(s, ALLOC_SLOWPATH);
-
-       do {
-               object = c->page->freelist;
-               counters = c->page->counters;
-               new.counters = counters;
-               VM_BUG_ON(!new.frozen);
-
-               /*
-                * If there is no object left then we use this loop to
-                * deactivate the slab which is simple since no objects
-                * are left in the slab and therefore we do not need to
-                * put the page back onto the partial list.
-                *
-                * If there are objects left then we retrieve them
-                * and use them to refill the per cpu queue.
-                */
+       /* must check again c->freelist in case of cpu migration or IRQ */
+       object = c->freelist;
+       if (object)
+               goto load_freelist;
  
-               new.inuse = c->page->objects;
-               new.frozen = object != NULL;
+       stat(s, ALLOC_SLOWPATH);
  
-       } while (!__cmpxchg_double_slab(s, c->page,
-                       object, counters,
-                       NULL, new.counters,
-                       "__slab_alloc"));
+       object = get_freelist(s, c->page);
  
         if (!object) {
                 c->page = NULL;
@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
          *    per node list when we run out of per cpu objects. We only fetch 50%
          *    to keep some capacity around for frees.
          */
-       if (s->size >= PAGE_SIZE)
+       if (kmem_cache_debug(s))
+               s->cpu_partial = 0;
+       else if (s->size >= PAGE_SIZE)
                 s->cpu_partial = 2;
         else if (s->size >= 1024)
                 s->cpu_partial = 6;
@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
         err = strict_strtoul(buf, 10, &objects);
         if (err)
                 return err;
+       if (objects && kmem_cache_debug(s))
+               return -EINVAL;
  
         s->cpu_partial = objects;
         flush_all(s);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Jan 2012 02:52:23 +0000 (18:52 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 12 Jan 2012 02:52:23 +0000 (18:52 -0800)
Documentation/kernel-parameters.txt		patch \| blob \| history
Documentation/vm/slub.txt		patch \| blob \| history
mm/slab.c		patch \| blob \| history
mm/slub.c		patch \| blob \| history