[XFS] Improve xfsbufd delayed write submission patterns, after blktrace
authorNathan Scott <nathans@sgi.com>
Thu, 28 Sep 2006 00:52:15 +0000 (10:52 +1000)
committerTim Shimmin <tes@sgi.com>
Thu, 28 Sep 2006 00:52:15 +0000 (10:52 +1000)
analysis.

Under a sequential create+allocate workload, blktrace reported backward
writes being issued by xfsbufd, and frequent inappropriate queue unplugs.
We now insert at the tail when moving from the delwri lists to the temp
lists, which maintains correct ordering, and we avoid unplugging queues
deep in the submit paths when we'd shortly do it at a higher level anyway.
blktrace now reports much healthier write patterns from xfsbufd for this
workload (and likely many others).

SGI-PV: 954310
SGI-Modid: xfs-linux-melb:xfs-kern:26396a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
fs/xfs/linux-2.6/xfs_buf.c

index 2af528dcfb0428ca93f2a5dced879e400682b876..f13503508c4618cf4c302e56ccd7756c1f5272b3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -1681,6 +1681,7 @@ xfsbufd(
        xfs_buf_t               *bp, *n;
        struct list_head        *dwq = &target->bt_delwrite_queue;
        spinlock_t              *dwlk = &target->bt_delwrite_lock;
+       int                     count;
 
        current->flags |= PF_MEMALLOC;
 
@@ -1696,6 +1697,7 @@ xfsbufd(
                schedule_timeout_interruptible(
                        xfs_buf_timer_centisecs * msecs_to_jiffies(10));
 
+               count = 0;
                age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
                spin_lock(dwlk);
                list_for_each_entry_safe(bp, n, dwq, b_list) {
@@ -1711,9 +1713,11 @@ xfsbufd(
                                        break;
                                }
 
-                               bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+                               bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+                                                _XBF_RUN_QUEUES);
                                bp->b_flags |= XBF_WRITE;
-                               list_move(&bp->b_list, &tmp);
+                               list_move_tail(&bp->b_list, &tmp);
+                               count++;
                        }
                }
                spin_unlock(dwlk);
@@ -1724,12 +1728,12 @@ xfsbufd(
 
                        list_del_init(&bp->b_list);
                        xfs_buf_iostrategy(bp);
-
-                       blk_run_address_space(target->bt_mapping);
                }
 
                if (as_list_len > 0)
                        purge_addresses();
+               if (count)
+                       blk_run_address_space(target->bt_mapping);
 
                clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
        } while (!kthread_should_stop());
@@ -1767,7 +1771,7 @@ xfs_flush_buftarg(
                        continue;
                }
 
-               list_move(&bp->b_list, &tmp);
+               list_move_tail(&bp->b_list, &tmp);
        }
        spin_unlock(dwlk);
 
@@ -1776,7 +1780,7 @@ xfs_flush_buftarg(
         */
        list_for_each_entry_safe(bp, n, &tmp, b_list) {
                xfs_buf_lock(bp);
-               bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+               bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
                bp->b_flags |= XBF_WRITE;
                if (wait)
                        bp->b_flags &= ~XBF_ASYNC;
@@ -1786,6 +1790,9 @@ xfs_flush_buftarg(
                xfs_buf_iostrategy(bp);
        }
 
+       if (wait)
+               blk_run_address_space(target->bt_mapping);
+
        /*
         * Remaining list items must be flushed before returning
         */
@@ -1797,9 +1804,6 @@ xfs_flush_buftarg(
                xfs_buf_relse(bp);
        }
 
-       if (wait)
-               blk_run_address_space(target->bt_mapping);
-
        return pincount;
 }