Merge branch 'for-linus' into for-2.6.40/core
authorJens Axboe <jaxboe@fusionio.com>
Fri, 20 May 2011 18:36:16 +0000 (20:36 +0200)
committerJens Axboe <jaxboe@fusionio.com>
Fri, 20 May 2011 18:36:16 +0000 (20:36 +0200)
This patch merges in a fix that missed 2.6.39 final.

Conflicts:
block/blk.h

56 files changed:
Documentation/ABI/testing/sysfs-block
Makefile
arch/mips/ar7/gpio.c
arch/mips/include/asm/dma-mapping.h
arch/mips/kernel/traps.c
arch/mips/rb532/gpio.c
arch/powerpc/platforms/83xx/suspend.c
arch/powerpc/sysdev/fsl_msi.c
arch/sparc/kernel/pci_sabre.c
arch/sparc/kernel/pci_schizo.c
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-exec.c
block/blk-flush.c
block/blk-lib.c
block/blk-settings.c
block/blk-sysfs.c
block/blk-throttle.c
block/blk.h
block/cfq-iosched.c
block/elevator.c
drivers/ata/libata-scsi.c
drivers/atm/fore200e.c
drivers/block/paride/pcd.c
drivers/cdrom/viocd.c
drivers/char/hw_random/n2-drv.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/xilinx_hwicap/xilinx_hwicap.c
drivers/edac/ppc4xx_edac.c
drivers/i2c/busses/i2c-mpc.c
drivers/ide/ide-cd.c
drivers/mmc/host/sdhci-of-core.c
drivers/mtd/maps/physmap_of.c
drivers/net/can/mscan/mpc5xxx_can.c
drivers/net/fs_enet/fs_enet-main.c
drivers/net/fs_enet/mii-fec.c
drivers/net/sunhme.c
drivers/scsi/qlogicpti.c
drivers/scsi/sr.c
drivers/tty/serial/of_serial.c
drivers/usb/gadget/fsl_qe_udc.c
drivers/watchdog/mpc8xxx_wdt.c
fs/block_dev.c
fs/configfs/dir.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/file.c
fs/ocfs2/journal.c
fs/partitions/check.c
include/linux/blkdev.h
include/linux/device.h
include/linux/genhd.h
include/linux/of_device.h

index 4873c759d535a7549d5eecf62a7125b29d6c2dea..c1eb41cb9876083d3df79a6a995b692762acd21b 100644 (file)
@@ -142,3 +142,67 @@ Description:
                with the previous I/O request are enabled. When set to 2,
                all merge tries are disabled. The default value is 0 -
                which enables all types of merge tries.
+
+What:          /sys/block/<disk>/discard_alignment
+Date:          May 2011
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Devices that support discard functionality may
+               internally allocate space in units that are bigger than
+               the exported logical block size. The discard_alignment
+               parameter indicates how many bytes the beginning of the
+               device is offset from the internal allocation unit's
+               natural alignment.
+
+What:          /sys/block/<disk>/<partition>/discard_alignment
+Date:          May 2011
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Devices that support discard functionality may
+               internally allocate space in units that are bigger than
+               the exported logical block size. The discard_alignment
+               parameter indicates how many bytes the beginning of the
+               partition is offset from the internal allocation unit's
+               natural alignment.
+
+What:          /sys/block/<disk>/queue/discard_granularity
+Date:          May 2011
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Devices that support discard functionality may
+               internally allocate space using units that are bigger
+               than the logical block size. The discard_granularity
+               parameter indicates the size of the internal allocation
+               unit in bytes if reported by the device. Otherwise the
+               discard_granularity will be set to match the device's
+               physical block size. A discard_granularity of 0 means
+               that the device does not support discard functionality.
+
+What:          /sys/block/<disk>/queue/discard_max_bytes
+Date:          May 2011
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Devices that support discard functionality may have
+               internal limits on the number of bytes that can be
+               trimmed or unmapped in a single operation. Some storage
+               protocols also have inherent limits on the number of
+               blocks that can be described in a single command. The
+               discard_max_bytes parameter is set by the device driver
+               to the maximum number of bytes that can be discarded in
+               a single operation. Discard requests issued to the
+               device must not exceed this limit. A discard_max_bytes
+               value of 0 means that the device does not support
+               discard functionality.
+
+What:          /sys/block/<disk>/queue/discard_zeroes_data
+Date:          May 2011
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Devices that support discard functionality may return
+               stale or random data when a previously discarded block
+               is read back. This can cause problems if the filesystem
+               expects discarded blocks to be explicitly cleared. If a
+               device reports that it deterministically returns zeroes
+               when a discarded area is read the discard_zeroes_data
+               parameter will be set to one. Otherwise it will be 0 and
+               the result of reading a discarded area is undefined.
index 41ea6fbec55a1edcdcd697510afcc417578b057c..123d858dae03826a29f562b4792c4d5161878e0b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 39
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
index 425dfa5d6e12c69380b160edde7edf58477860af..bb571bcdb8f2f06d5d2cb8b2d787591f35b4e66d 100644 (file)
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void)
                size = 0x1f;
        }
 
-       gpch->regs = ioremap_nocache(AR7_REGS_GPIO,
-                                       AR7_REGS_GPIO + 0x10);
-
+       gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
        if (!gpch->regs) {
                printk(KERN_ERR "%s: failed to ioremap regs\n",
                                        gpch->chip.label);
index 655f849bd08d8767e81f6846ce205ce40e487980..7aa37ddfca4ba59b25603183a1d5bca23099fa9c 100644 (file)
@@ -5,7 +5,9 @@
 #include <asm/cache.h>
 #include <asm-generic/dma-coherent.h>
 
+#ifndef CONFIG_SGI_IP27        /* Kludge to fix 2.6.39 build for IP27 */
 #include <dma-coherence.h>
+#endif
 
 extern struct dma_map_ops *mips_dma_map_ops;
 
index 71350f7f2d8857507cc8da104c3dcee9a6a0a072..e9b3af27d844b5db837dc641bdd9c87ba326f9ba 100644 (file)
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
        unsigned long dvpret = dvpe();
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-       notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV);
+       if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
+               sig = 0;
 
        console_verbose();
        spin_lock_irq(&die_lock);
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs)
        mips_mt_regdump(dvpret);
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-       if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
-               sig = 0;
-
        printk("%s[#%d]:\n", str, ++die_counter);
        show_registers(regs);
        add_taint(TAINT_DIE);
index 37de05d595e725ebdc59513181d3f43709ce1cb4..6c47dfeb7be326bf2dc4c5a7a1cbb1984b87efec 100644 (file)
@@ -185,7 +185,7 @@ int __init rb532_gpio_init(void)
        struct resource *r;
 
        r = rb532_gpio_reg0_res;
-       rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start);
+       rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r));
 
        if (!rb532_gpio_chip->regbase) {
                printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
index 188272934cfb3c6818754dae0260216b5c4ccb65..104faa8aa23c783da70b2dd97dac842ff8198c3c 100644 (file)
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
        .end = mpc83xx_suspend_end,
 };
 
+static struct of_device_id pmc_match[];
 static int pmc_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct device_node *np = ofdev->dev.of_node;
        struct resource res;
        struct pmc_type *type;
        int ret = 0;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(pmc_match, &ofdev->dev);
+       if (!match)
                return -EINVAL;
 
-       type = ofdev->dev.of_match->data;
+       type = match->data;
 
        if (!of_device_is_available(np))
                return -ENODEV;
index d5679dc1e20faddf7d5a20df6bc1374aee74e264..01cd2f089512149b7817199b7915677c6fa86b24 100644 (file)
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
        return 0;
 }
 
+static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
+       const struct of_device_id *match;
        struct fsl_msi *msi;
        struct resource res;
        int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
        u32 offset;
        static const u32 all_avail[] = { 0, NR_MSI_IRQS };
 
-       if (!dev->dev.of_match)
+       match = of_match_device(fsl_of_msi_ids, &dev->dev);
+       if (!match)
                return -EINVAL;
-       features = dev->dev.of_match->data;
+       features = match->data;
 
        printk(KERN_DEBUG "Setting up Freescale MSI support\n");
 
index 948068a083fca3eab756401d6fb30e00e295e352..d1840dbdaa2fef0179e55c06b24830246e397fc0 100644 (file)
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
        sabre_scan_bus(pbm, &op->dev);
 }
 
+static const struct of_device_id sabre_match[];
 static int __devinit sabre_probe(struct platform_device *op)
 {
+       const struct of_device_id *match;
        const struct linux_prom64_registers *pr_regs;
        struct device_node *dp = op->dev.of_node;
        struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
        const u32 *vdma;
        u64 clear_irq;
 
-       hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+       match = of_match_device(sabre_match, &op->dev);
+       hummingbird_p = match && (match->data != NULL);
        if (!hummingbird_p) {
                struct device_node *cpu_dp;
 
index fecfcb2063c805a11760e515d549b40d4c322b3a..283fbc329a4397c9d8ff9b159611fe1bf62d4e8b 100644 (file)
@@ -1458,11 +1458,15 @@ out_err:
        return err;
 }
 
+static const struct of_device_id schizo_match[];
 static int __devinit schizo_probe(struct platform_device *op)
 {
-       if (!op->dev.of_match)
+       const struct of_device_id *match;
+
+       match = of_match_device(schizo_match, &op->dev);
+       if (!match)
                return -EINVAL;
-       return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+       return __schizo_init(op, (unsigned long)match->data);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
index 471fdcc5df85e3135dfefac29aa81a889d0ed567..e41cc6f2ccc1536d637b7971537d339144fb6777 100644 (file)
@@ -385,25 +385,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
 
        spin_lock_irqsave(&blkg->stats_lock, flags);
        blkg->stats.time += time;
+#ifdef CONFIG_DEBUG_BLK_CGROUP
        blkg->stats.unaccounted_time += unaccounted_time;
+#endif
        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 
+/*
+ * should be called under rcu read lock or queue lock to make sure blkg pointer
+ * is valid.
+ */
 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
                                uint64_t bytes, bool direction, bool sync)
 {
-       struct blkio_group_stats *stats;
+       struct blkio_group_stats_cpu *stats_cpu;
        unsigned long flags;
 
-       spin_lock_irqsave(&blkg->stats_lock, flags);
-       stats = &blkg->stats;
-       stats->sectors += bytes >> 9;
-       blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
-                       sync);
-       blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
-                       direction, sync);
-       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+       /*
+        * Disabling interrupts to provide mutual exclusion between two
+        * writes on same cpu. It probably is not needed for 64bit. Not
+        * optimizing that case yet.
+        */
+       local_irq_save(flags);
+
+       stats_cpu = this_cpu_ptr(blkg->stats_cpu);
+
+       u64_stats_update_begin(&stats_cpu->syncp);
+       stats_cpu->sectors += bytes >> 9;
+       blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
+                       1, direction, sync);
+       blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
+                       bytes, direction, sync);
+       u64_stats_update_end(&stats_cpu->syncp);
+       local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
 
@@ -438,6 +453,20 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
+/*
+ * This function allocates the per cpu stats for blkio_group. Should be called
+ * from sleepable context as alloc_per_cpu() requires that.
+ */
+int blkio_alloc_blkg_stats(struct blkio_group *blkg)
+{
+       /* Allocate memory for per cpu stats */
+       blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+       if (!blkg->stats_cpu)
+               return -ENOMEM;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
+
 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
                struct blkio_group *blkg, void *key, dev_t dev,
                enum blkio_policy_id plid)
@@ -508,6 +537,30 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
 }
 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
 
+static void blkio_reset_stats_cpu(struct blkio_group *blkg)
+{
+       struct blkio_group_stats_cpu *stats_cpu;
+       int i, j, k;
+       /*
+        * Note: On 64 bit arch this should not be an issue. This has the
+        * possibility of returning some inconsistent value on 32bit arch
+        * as 64bit update on 32bit is non atomic. Taking care of this
+        * corner case makes code very complicated, like sending IPIs to
+        * cpus, taking care of stats of offline cpus etc.
+        *
+        * reset stats is anyway more of a debug feature and this sounds a
+        * corner case. So I am not complicating the code yet until and
+        * unless this becomes a real issue.
+        */
+       for_each_possible_cpu(i) {
+               stats_cpu = per_cpu_ptr(blkg->stats_cpu, i);
+               stats_cpu->sectors = 0;
+               for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
+                       for (k = 0; k < BLKIO_STAT_TOTAL; k++)
+                               stats_cpu->stat_arr_cpu[j][k] = 0;
+       }
+}
+
 static int
 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 {
@@ -552,7 +605,11 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
                }
 #endif
                spin_unlock(&blkg->stats_lock);
+
+               /* Reset Per cpu stats which don't take blkg->stats_lock */
+               blkio_reset_stats_cpu(blkg);
        }
+
        spin_unlock_irq(&blkcg->lock);
        return 0;
 }
@@ -598,6 +655,59 @@ static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
        return val;
 }
 
+
+static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
+                       enum stat_type_cpu type, enum stat_sub_type sub_type)
+{
+       int cpu;
+       struct blkio_group_stats_cpu *stats_cpu;
+       u64 val = 0, tval;
+
+       for_each_possible_cpu(cpu) {
+               unsigned int start;
+               stats_cpu  = per_cpu_ptr(blkg->stats_cpu, cpu);
+
+               do {
+                       start = u64_stats_fetch_begin(&stats_cpu->syncp);
+                       if (type == BLKIO_STAT_CPU_SECTORS)
+                               tval = stats_cpu->sectors;
+                       else
+                               tval = stats_cpu->stat_arr_cpu[type][sub_type];
+               } while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
+
+               val += tval;
+       }
+
+       return val;
+}
+
+static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
+               struct cgroup_map_cb *cb, dev_t dev, enum stat_type_cpu type)
+{
+       uint64_t disk_total, val;
+       char key_str[MAX_KEY_LEN];
+       enum stat_sub_type sub_type;
+
+       if (type == BLKIO_STAT_CPU_SECTORS) {
+               val = blkio_read_stat_cpu(blkg, type, 0);
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb, dev);
+       }
+
+       for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
+                       sub_type++) {
+               blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
+               val = blkio_read_stat_cpu(blkg, type, sub_type);
+               cb->fill(cb, key_str, val);
+       }
+
+       disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
+                       blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);
+
+       blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
+       cb->fill(cb, key_str, disk_total);
+       return disk_total;
+}
+
 /* This should be called with blkg->stats_lock held */
 static uint64_t blkio_get_stat(struct blkio_group *blkg,
                struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
@@ -609,9 +719,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
        if (type == BLKIO_STAT_TIME)
                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
                                        blkg->stats.time, cb, dev);
-       if (type == BLKIO_STAT_SECTORS)
-               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-                                       blkg->stats.sectors, cb, dev);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
        if (type == BLKIO_STAT_UNACCOUNTED_TIME)
                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
@@ -1075,8 +1182,8 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
 }
 
 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
-               struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
-               bool show_total)
+               struct cftype *cft, struct cgroup_map_cb *cb,
+               enum stat_type type, bool show_total, bool pcpu)
 {
        struct blkio_group *blkg;
        struct hlist_node *n;
@@ -1087,10 +1194,15 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
                if (blkg->dev) {
                        if (!cftype_blkg_same_policy(cft, blkg))
                                continue;
-                       spin_lock_irq(&blkg->stats_lock);
-                       cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
-                                               type);
-                       spin_unlock_irq(&blkg->stats_lock);
+                       if (pcpu)
+                               cgroup_total += blkio_get_stat_cpu(blkg, cb,
+                                               blkg->dev, type);
+                       else {
+                               spin_lock_irq(&blkg->stats_lock);
+                               cgroup_total += blkio_get_stat(blkg, cb,
+                                               blkg->dev, type);
+                               spin_unlock_irq(&blkg->stats_lock);
+                       }
                }
        }
        if (show_total)
@@ -1114,47 +1226,47 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
                switch(name) {
                case BLKIO_PROP_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_TIME, 0);
+                                               BLKIO_STAT_TIME, 0, 0);
                case BLKIO_PROP_sectors:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_SECTORS, 0);
+                                               BLKIO_STAT_CPU_SECTORS, 0, 1);
                case BLKIO_PROP_io_service_bytes:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_SERVICE_BYTES, 1);
+                                       BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
                case BLKIO_PROP_io_serviced:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_SERVICED, 1);
+                                               BLKIO_STAT_CPU_SERVICED, 1, 1);
                case BLKIO_PROP_io_service_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_SERVICE_TIME, 1);
+                                               BLKIO_STAT_SERVICE_TIME, 1, 0);
                case BLKIO_PROP_io_wait_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_WAIT_TIME, 1);
+                                               BLKIO_STAT_WAIT_TIME, 1, 0);
                case BLKIO_PROP_io_merged:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_MERGED, 1);
+                                               BLKIO_STAT_MERGED, 1, 0);
                case BLKIO_PROP_io_queued:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_QUEUED, 1);
+                                               BLKIO_STAT_QUEUED, 1, 0);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
                case BLKIO_PROP_unaccounted_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_UNACCOUNTED_TIME, 0);
+                                       BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
                case BLKIO_PROP_dequeue:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_DEQUEUE, 0);
+                                               BLKIO_STAT_DEQUEUE, 0, 0);
                case BLKIO_PROP_avg_queue_size:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_AVG_QUEUE_SIZE, 0);
+                                       BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
                case BLKIO_PROP_group_wait_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_GROUP_WAIT_TIME, 0);
+                                       BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
                case BLKIO_PROP_idle_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_IDLE_TIME, 0);
+                                               BLKIO_STAT_IDLE_TIME, 0, 0);
                case BLKIO_PROP_empty_time:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_EMPTY_TIME, 0);
+                                               BLKIO_STAT_EMPTY_TIME, 0, 0);
 #endif
                default:
                        BUG();
@@ -1164,10 +1276,10 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
                switch(name){
                case BLKIO_THROTL_io_service_bytes:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_SERVICE_BYTES, 1);
+                                               BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
                case BLKIO_THROTL_io_serviced:
                        return blkio_read_blkg_stats(blkcg, cft, cb,
-                                               BLKIO_STAT_SERVICED, 1);
+                                               BLKIO_STAT_CPU_SERVICED, 1, 1);
                default:
                        BUG();
                }
index c774930cc20659de992ccebc21d35ffd1e80e234..262226798093be1cbb54adee8911a259452c96f2 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/u64_stats_sync.h>
 
 enum blkio_policy_id {
        BLKIO_POLICY_PROP = 0,          /* Proportional Bandwidth division */
@@ -36,10 +37,6 @@ enum stat_type {
         * request completion for IOs doen by this cgroup. This may not be
         * accurate when NCQ is turned on. */
        BLKIO_STAT_SERVICE_TIME = 0,
-       /* Total bytes transferred */
-       BLKIO_STAT_SERVICE_BYTES,
-       /* Total IOs serviced, post merge */
-       BLKIO_STAT_SERVICED,
        /* Total time spent waiting in scheduler queue in ns */
        BLKIO_STAT_WAIT_TIME,
        /* Number of IOs merged */
@@ -48,10 +45,9 @@ enum stat_type {
        BLKIO_STAT_QUEUED,
        /* All the single valued stats go below this */
        BLKIO_STAT_TIME,
-       BLKIO_STAT_SECTORS,
+#ifdef CONFIG_DEBUG_BLK_CGROUP
        /* Time not charged to this cgroup */
        BLKIO_STAT_UNACCOUNTED_TIME,
-#ifdef CONFIG_DEBUG_BLK_CGROUP
        BLKIO_STAT_AVG_QUEUE_SIZE,
        BLKIO_STAT_IDLE_TIME,
        BLKIO_STAT_EMPTY_TIME,
@@ -60,6 +56,16 @@ enum stat_type {
 #endif
 };
 
+/* Per cpu stats */
+enum stat_type_cpu {
+       BLKIO_STAT_CPU_SECTORS,
+       /* Total bytes transferred */
+       BLKIO_STAT_CPU_SERVICE_BYTES,
+       /* Total IOs serviced, post merge */
+       BLKIO_STAT_CPU_SERVICED,
+       BLKIO_STAT_CPU_NR
+};
+
 enum stat_sub_type {
        BLKIO_STAT_READ = 0,
        BLKIO_STAT_WRITE,
@@ -116,11 +122,11 @@ struct blkio_cgroup {
 struct blkio_group_stats {
        /* total disk time and nr sectors dispatched by this group */
        uint64_t time;
-       uint64_t sectors;
-       /* Time not charged to this cgroup */
-       uint64_t unaccounted_time;
        uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
 #ifdef CONFIG_DEBUG_BLK_CGROUP
+       /* Time not charged to this cgroup */
+       uint64_t unaccounted_time;
+
        /* Sum of number of IOs queued across all samples */
        uint64_t avg_queue_size_sum;
        /* Count of samples taken for average */
@@ -145,6 +151,13 @@ struct blkio_group_stats {
 #endif
 };
 
+/* Per cpu blkio group stats */
+struct blkio_group_stats_cpu {
+       uint64_t sectors;
+       uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
+       struct u64_stats_sync syncp;
+};
+
 struct blkio_group {
        /* An rcu protected unique identifier for the group */
        void *key;
@@ -160,6 +173,8 @@ struct blkio_group {
        /* Need to serialize the stats in the case of reset/update */
        spinlock_t stats_lock;
        struct blkio_group_stats stats;
+       /* Per cpu stats pointer */
+       struct blkio_group_stats_cpu __percpu *stats_cpu;
 };
 
 struct blkio_policy_node {
@@ -295,6 +310,7 @@ extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
 extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
        struct blkio_group *blkg, void *key, dev_t dev,
        enum blkio_policy_id plid);
+extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
 extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
                                                void *key);
@@ -322,6 +338,8 @@ static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
                struct blkio_group *blkg, void *key, dev_t dev,
                enum blkio_policy_id plid) {}
 
+static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
+
 static inline int
 blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
 
index 3fe00a14822a203cacc0529ad0610cf5ce7dae06..9e8e297374b9cf693c5c6a165ac07f93014bacb7 100644 (file)
@@ -1550,7 +1550,8 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
                }
 
-               blk_throtl_bio(q, &bio);
+               if (blk_throtl_bio(q, &bio))
+                       goto end_io;
 
                /*
                 * If bio = NULL, bio has been throttled and will be submitted
index 81e31819a597bb0c6ed6dd4f781eb037c986a243..8a0e7ec056e7a3bd6db31f19ec72202ae97c581e 100644 (file)
@@ -56,7 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
        spin_lock_irq(q->queue_lock);
        __elv_add_request(q, rq, where);
        __blk_run_queue(q);
-       /* the queue is stopped so it won't be plugged+unplugged */
+       /* the queue is stopped so it won't be run */
        if (rq->cmd_type == REQ_TYPE_PM_RESUME)
                q->request_fn(q);
        spin_unlock_irq(q->queue_lock);
index 6c9b5e189e624888860e5185d1d9b3479b3e49a4..bb21e4c36f70ca437162356edd87d2ce3a265a46 100644 (file)
@@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
        }
 
        /*
-        * Moving a request silently to empty queue_head may stall the
-        * queue.  Kick the queue in those cases.  This function is called
-        * from request completion path and calling directly into
-        * request_fn may confuse the driver.  Always use kblockd.
+        * Kick the queue to avoid stall for two cases:
+        * 1. Moving a request silently to empty queue_head may stall the
+        * queue.
+        * 2. When flush request is running in non-queueable queue, the
+        * queue is hold. Restart the queue after flush request is finished
+        * to avoid stall.
+        * This function is called from request completion path and calling
+        * directly into request_fn may confuse the driver.  Always use
+        * kblockd.
         */
-       if (queued)
+       if (queued || q->flush_queue_delayed)
                blk_run_queue_async(q);
+       q->flush_queue_delayed = 0;
 }
 
 /**
index 25de73e4759b82d624c2a89b545baf6ea859f624..78e627e2581d5a6c80ec12d9267c57c0468ecf5f 100644 (file)
@@ -9,17 +9,20 @@
 
 #include "blk.h"
 
-static void blkdev_discard_end_io(struct bio *bio, int err)
-{
-       if (err) {
-               if (err == -EOPNOTSUPP)
-                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       }
+struct bio_batch {
+       atomic_t                done;
+       unsigned long           flags;
+       struct completion       *wait;
+};
 
-       if (bio->bi_private)
-               complete(bio->bi_private);
+static void bio_batch_end_io(struct bio *bio, int err)
+{
+       struct bio_batch *bb = bio->bi_private;
 
+       if (err && (err != -EOPNOTSUPP))
+               clear_bit(BIO_UPTODATE, &bb->flags);
+       if (atomic_dec_and_test(&bb->done))
+               complete(bb->wait);
        bio_put(bio);
 }
 
@@ -41,6 +44,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        struct request_queue *q = bdev_get_queue(bdev);
        int type = REQ_WRITE | REQ_DISCARD;
        unsigned int max_discard_sectors;
+       struct bio_batch bb;
        struct bio *bio;
        int ret = 0;
 
@@ -67,7 +71,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                type |= REQ_SECURE;
        }
 
-       while (nr_sects && !ret) {
+       atomic_set(&bb.done, 1);
+       bb.flags = 1 << BIO_UPTODATE;
+       bb.wait = &wait;
+
+       while (nr_sects) {
                bio = bio_alloc(gfp_mask, 1);
                if (!bio) {
                        ret = -ENOMEM;
@@ -75,9 +83,9 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                }
 
                bio->bi_sector = sector;
-               bio->bi_end_io = blkdev_discard_end_io;
+               bio->bi_end_io = bio_batch_end_io;
                bio->bi_bdev = bdev;
-               bio->bi_private = &wait;
+               bio->bi_private = &bb;
 
                if (nr_sects > max_discard_sectors) {
                        bio->bi_size = max_discard_sectors << 9;
@@ -88,45 +96,21 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                        nr_sects = 0;
                }
 
-               bio_get(bio);
+               atomic_inc(&bb.done);
                submit_bio(type, bio);
+       }
 
+       /* Wait for bios in-flight */
+       if (!atomic_dec_and_test(&bb.done))
                wait_for_completion(&wait);
 
-               if (bio_flagged(bio, BIO_EOPNOTSUPP))
-                       ret = -EOPNOTSUPP;
-               else if (!bio_flagged(bio, BIO_UPTODATE))
-                       ret = -EIO;
-               bio_put(bio);
-       }
+       if (!test_bit(BIO_UPTODATE, &bb.flags))
+               ret = -EIO;
 
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
 
-struct bio_batch
-{
-       atomic_t                done;
-       unsigned long           flags;
-       struct completion       *wait;
-};
-
-static void bio_batch_end_io(struct bio *bio, int err)
-{
-       struct bio_batch *bb = bio->bi_private;
-
-       if (err) {
-               if (err == -EOPNOTSUPP)
-                       set_bit(BIO_EOPNOTSUPP, &bb->flags);
-               else
-                       clear_bit(BIO_UPTODATE, &bb->flags);
-       }
-       if (bb)
-               if (atomic_dec_and_test(&bb->done))
-                       complete(bb->wait);
-       bio_put(bio);
-}
-
 /**
  * blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:      blockdev to issue
@@ -151,7 +135,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
        bb.flags = 1 << BIO_UPTODATE;
        bb.wait = &wait;
 
-submit:
        ret = 0;
        while (nr_sects != 0) {
                bio = bio_alloc(gfp_mask,
@@ -168,9 +151,6 @@ submit:
 
                while (nr_sects != 0) {
                        sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-                       if (sz == 0)
-                               /* bio has maximum size possible */
-                               break;
                        ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
                        nr_sects -= ret >> 9;
                        sector += ret >> 9;
@@ -190,16 +170,6 @@ submit:
                /* One of bios in the batch was completed with error.*/
                ret = -EIO;
 
-       if (ret)
-               goto out;
-
-       if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
-               ret = -EOPNOTSUPP;
-               goto out;
-       }
-       if (nr_sects != 0)
-               goto submit;
-out:
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_zeroout);
index 1fa7692935976f4e60393e8d7289acf1c56c74c3..fa1eb0449a05f0db45de3ab8e6a01a2fc2a95238 100644 (file)
@@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->discard_granularity = 0;
        lim->discard_alignment = 0;
        lim->discard_misaligned = 0;
-       lim->discard_zeroes_data = -1;
+       lim->discard_zeroes_data = 1;
        lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
        lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
        lim->alignment_offset = 0;
@@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 
        blk_set_default_limits(&q->limits);
        blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+       q->limits.discard_zeroes_data = 0;
 
        /*
         * by default assume old behaviour and bounce for any highmem page
@@ -790,6 +791,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
+{
+       q->flush_not_queueable = !queueable;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
+
 static int __init blk_settings_init(void)
 {
        blk_max_low_pfn = max_low_pfn - 1;
index bd236313f35d59bd08da5a3d375cc29eed6fd86f..d935bd859c87bc1c9a0e39eb61438583f91690f9 100644 (file)
@@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
-       return queue_var_show(q->limits.max_discard_sectors << 9, page);
+       return sprintf(page, "%llu\n",
+                      (unsigned long long)q->limits.max_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
index 252a81a306f7bef0179f953ab1535a4a06a40798..a62be8d0dc1b34cfdf832947856669a8ee449a84 100644 (file)
@@ -78,6 +78,8 @@ struct throtl_grp {
 
        /* Some throttle limits got updated for the group */
        int limits_changed;
+
+       struct rcu_head rcu_head;
 };
 
 struct throtl_data
@@ -88,7 +90,7 @@ struct throtl_data
        /* service tree for active throtl groups */
        struct throtl_rb_root tg_service_tree;
 
-       struct throtl_grp root_tg;
+       struct throtl_grp *root_tg;
        struct request_queue *queue;
 
        /* Total Number of queued bios on READ and WRITE lists */
@@ -151,56 +153,44 @@ static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
        return tg;
 }
 
-static void throtl_put_tg(struct throtl_grp *tg)
+static void throtl_free_tg(struct rcu_head *head)
 {
-       BUG_ON(atomic_read(&tg->ref) <= 0);
-       if (!atomic_dec_and_test(&tg->ref))
-               return;
+       struct throtl_grp *tg;
+
+       tg = container_of(head, struct throtl_grp, rcu_head);
+       free_percpu(tg->blkg.stats_cpu);
        kfree(tg);
 }
 
-static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
-                       struct blkio_cgroup *blkcg)
+static void throtl_put_tg(struct throtl_grp *tg)
 {
-       struct throtl_grp *tg = NULL;
-       void *key = td;
-       struct backing_dev_info *bdi = &td->queue->backing_dev_info;
-       unsigned int major, minor;
+       BUG_ON(atomic_read(&tg->ref) <= 0);
+       if (!atomic_dec_and_test(&tg->ref))
+               return;
 
        /*
-        * TODO: Speed up blkiocg_lookup_group() by maintaining a radix
-        * tree of blkg (instead of traversing through hash list all
-        * the time.
+        * A group is freed in rcu manner. But having an rcu lock does not
+        * mean that one can access all the fields of blkg and assume these
+        * are valid. For example, don't try to follow throtl_data and
+        * request queue links.
+        *
+        * Having a reference to blkg under an rcu allows acess to only
+        * values local to groups like group stats and group rate limits
         */
+       call_rcu(&tg->rcu_head, throtl_free_tg);
+}
 
-       /*
-        * This is the common case when there are no blkio cgroups.
-        * Avoid lookup in this case
-        */
-       if (blkcg == &blkio_root_cgroup)
-               tg = &td->root_tg;
-       else
-               tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
-
-       /* Fill in device details for root group */
-       if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               tg->blkg.dev = MKDEV(major, minor);
-               goto done;
-       }
-
-       if (tg)
-               goto done;
-
-       tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
-       if (!tg)
-               goto done;
-
+static void throtl_init_group(struct throtl_grp *tg)
+{
        INIT_HLIST_NODE(&tg->tg_node);
        RB_CLEAR_NODE(&tg->rb_node);
        bio_list_init(&tg->bio_lists[0]);
        bio_list_init(&tg->bio_lists[1]);
-       td->limits_changed = false;
+       tg->limits_changed = false;
+
+       /* Practically unlimited BW */
+       tg->bps[0] = tg->bps[1] = -1;
+       tg->iops[0] = tg->iops[1] = -1;
 
        /*
         * Take the initial reference that will be released on destroy
@@ -209,33 +199,181 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
         * exit or cgroup deletion path depending on who is exiting first.
         */
        atomic_set(&tg->ref, 1);
+}
+
+/* Should be called with rcu read lock held (needed for blkcg) */
+static void
+throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
+{
+       hlist_add_head(&tg->tg_node, &td->tg_list);
+       td->nr_undestroyed_grps++;
+}
+
+static void
+__throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
+{
+       struct backing_dev_info *bdi = &td->queue->backing_dev_info;
+       unsigned int major, minor;
+
+       if (!tg || tg->blkg.dev)
+               return;
+
+       /*
+        * Fill in device details for a group which might not have been
+        * filled at group creation time as queue was being instantiated
+        * and driver had not attached a device yet
+        */
+       if (bdi->dev && dev_name(bdi->dev)) {
+               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+               tg->blkg.dev = MKDEV(major, minor);
+       }
+}
+
+/*
+ * Should be called with without queue lock held. Here queue lock will be
+ * taken rarely. It will be taken only once during life time of a group
+ * if need be
+ */
+static void
+throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
+{
+       if (!tg || tg->blkg.dev)
+               return;
+
+       spin_lock_irq(td->queue->queue_lock);
+       __throtl_tg_fill_dev_details(td, tg);
+       spin_unlock_irq(td->queue->queue_lock);
+}
+
+static void throtl_init_add_tg_lists(struct throtl_data *td,
+                       struct throtl_grp *tg, struct blkio_cgroup *blkcg)
+{
+       __throtl_tg_fill_dev_details(td, tg);
 
        /* Add group onto cgroup list */
-       sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
        blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
-                               MKDEV(major, minor), BLKIO_POLICY_THROTL);
+                               tg->blkg.dev, BLKIO_POLICY_THROTL);
 
        tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
        tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
        tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
        tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
 
-       hlist_add_head(&tg->tg_node, &td->tg_list);
-       td->nr_undestroyed_grps++;
-done:
+       throtl_add_group_to_td_list(td, tg);
+}
+
+/* Should be called without queue lock and outside of rcu period */
+static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
+{
+       struct throtl_grp *tg = NULL;
+       int ret;
+
+       tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
+       if (!tg)
+               return NULL;
+
+       ret = blkio_alloc_blkg_stats(&tg->blkg);
+
+       if (ret) {
+               kfree(tg);
+               return NULL;
+       }
+
+       throtl_init_group(tg);
        return tg;
 }
 
-static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+static struct
+throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
        struct throtl_grp *tg = NULL;
+       void *key = td;
+
+       /*
+        * This is the common case when there are no blkio cgroups.
+        * Avoid lookup in this case
+        */
+       if (blkcg == &blkio_root_cgroup)
+               tg = td->root_tg;
+       else
+               tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
+
+       __throtl_tg_fill_dev_details(td, tg);
+       return tg;
+}
+
+/*
+ * This function returns with queue lock unlocked in case of error, like
+ * request queue is no more
+ */
+static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+{
+       struct throtl_grp *tg = NULL, *__tg = NULL;
        struct blkio_cgroup *blkcg;
+       struct request_queue *q = td->queue;
 
        rcu_read_lock();
        blkcg = task_blkio_cgroup(current);
-       tg = throtl_find_alloc_tg(td, blkcg);
-       if (!tg)
-               tg = &td->root_tg;
+       tg = throtl_find_tg(td, blkcg);
+       if (tg) {
+               rcu_read_unlock();
+               return tg;
+       }
+
+       /*
+        * Need to allocate a group. Allocation of group also needs allocation
+        * of per cpu stats which in-turn takes a mutex() and can block. Hence
+        * we need to drop rcu lock and queue_lock before we call alloc
+        *
+        * Take the request queue reference to make sure queue does not
+        * go away once we return from allocation.
+        */
+       blk_get_queue(q);
+       rcu_read_unlock();
+       spin_unlock_irq(q->queue_lock);
+
+       tg = throtl_alloc_tg(td);
+       /*
+        * We might have slept in group allocation. Make sure queue is not
+        * dead
+        */
+       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+               blk_put_queue(q);
+               if (tg)
+                       kfree(tg);
+
+               return ERR_PTR(-ENODEV);
+       }
+       blk_put_queue(q);
+
+       /* Group allocated and queue is still alive. take the lock */
+       spin_lock_irq(q->queue_lock);
+
+       /*
+        * Initialize the new group. After sleeping, read the blkcg again.
+        */
+       rcu_read_lock();
+       blkcg = task_blkio_cgroup(current);
+
+       /*
+        * If some other thread already allocated the group while we were
+        * not holding queue lock, free up the group
+        */
+       __tg = throtl_find_tg(td, blkcg);
+
+       if (__tg) {
+               kfree(tg);
+               rcu_read_unlock();
+               return __tg;
+       }
+
+       /* Group allocation failed. Account the IO to root group */
+       if (!tg) {
+               tg = td->root_tg;
+               return tg;
+       }
+
+       throtl_init_add_tg_lists(td, tg, blkcg);
        rcu_read_unlock();
        return tg;
 }
@@ -544,6 +682,12 @@ static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg,
        return 0;
 }
 
+static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) {
+       if (tg->bps[rw] == -1 && tg->iops[rw] == -1)
+               return 1;
+       return 0;
+}
+
 /*
  * Returns whether one can dispatch a bio or not. Also returns approx number
  * of jiffies to wait before this bio is with-in IO rate and can be dispatched
@@ -608,10 +752,6 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
        tg->bytes_disp[rw] += bio->bi_size;
        tg->io_disp[rw]++;
 
-       /*
-        * TODO: This will take blkg->stats_lock. Figure out a way
-        * to avoid this cost.
-        */
        blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
 }
 
@@ -989,15 +1129,51 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
        struct throtl_grp *tg;
        struct bio *bio = *biop;
        bool rw = bio_data_dir(bio), update_disptime = true;
+       struct blkio_cgroup *blkcg;
 
        if (bio->bi_rw & REQ_THROTTLED) {
                bio->bi_rw &= ~REQ_THROTTLED;
                return 0;
        }
 
+       /*
+        * A throtl_grp pointer retrieved under rcu can be used to access
+        * basic fields like stats and io rates. If a group has no rules,
+        * just update the dispatch stats in lockless manner and return.
+        */
+
+       rcu_read_lock();
+       blkcg = task_blkio_cgroup(current);
+       tg = throtl_find_tg(td, blkcg);
+       if (tg) {
+               throtl_tg_fill_dev_details(td, tg);
+
+               if (tg_no_rule_group(tg, rw)) {
+                       blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
+                                       rw, bio->bi_rw & REQ_SYNC);
+                       rcu_read_unlock();
+                       return 0;
+               }
+       }
+       rcu_read_unlock();
+
+       /*
+        * Either group has not been allocated yet or it is not an unlimited
+        * IO group
+        */
+
        spin_lock_irq(q->queue_lock);
        tg = throtl_get_tg(td);
 
+       if (IS_ERR(tg)) {
+               if (PTR_ERR(tg) == -ENODEV) {
+                       /*
+                        * Queue is gone. No queue lock held here.
+                        */
+                       return -ENODEV;
+               }
+       }
+
        if (tg->nr_queued[rw]) {
                /*
                 * There is already another bio queued in same dir. No
@@ -1060,39 +1236,24 @@ int blk_throtl_init(struct request_queue *q)
        INIT_HLIST_HEAD(&td->tg_list);
        td->tg_service_tree = THROTL_RB_ROOT;
        td->limits_changed = false;
+       INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
 
-       /* Init root group */
-       tg = &td->root_tg;
-       INIT_HLIST_NODE(&tg->tg_node);
-       RB_CLEAR_NODE(&tg->rb_node);
-       bio_list_init(&tg->bio_lists[0]);
-       bio_list_init(&tg->bio_lists[1]);
-
-       /* Practically unlimited BW */
-       tg->bps[0] = tg->bps[1] = -1;
-       tg->iops[0] = tg->iops[1] = -1;
-       td->limits_changed = false;
+       /* alloc and Init root group. */
+       td->queue = q;
+       tg = throtl_alloc_tg(td);
 
-       /*
-        * Set root group reference to 2. One reference will be dropped when
-        * all groups on tg_list are being deleted during queue exit. Other
-        * reference will remain there as we don't want to delete this group
-        * as it is statically allocated and gets destroyed when throtl_data
-        * goes away.
-        */
-       atomic_set(&tg->ref, 2);
-       hlist_add_head(&tg->tg_node, &td->tg_list);
-       td->nr_undestroyed_grps++;
+       if (!tg) {
+               kfree(td);
+               return -ENOMEM;
+       }
 
-       INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
+       td->root_tg = tg;
 
        rcu_read_lock();
-       blkiocg_add_blkio_group(&blkio_root_cgroup, &tg->blkg, (void *)td,
-                                       0, BLKIO_POLICY_THROTL);
+       throtl_init_add_tg_lists(td, tg, &blkio_root_cgroup);
        rcu_read_unlock();
 
        /* Attach throtl data to request queue */
-       td->queue = q;
        q->td = td;
        return 0;
 }
index 4df474d37e6d353d08aef657268880fde9ef1c3c..d6586287adc9ccf44e0adc556b552c7b1404ca4b 100644 (file)
@@ -62,6 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q)
                        return rq;
                }
 
+               /*
+                * Flush request is running and flush request isn't queueable
+                * in the drive, we can hold the queue till flush request is
+                * finished. Even we don't do this, driver can't dispatch next
+                * requests and will requeue them. And this can improve
+                * throughput too. For example, we have request flush1, write1,
+                * flush 2. flush1 is dispatched, then queue is hold, write1
+                * isn't inserted to queue. After flush1 is finished, flush2
+                * will be dispatched. Since disk cache is already clean,
+                * flush2 will be finished very soon, so looks like flush2 is
+                * folded to flush1.
+                * Since the queue is hold, a flag is set to indicate the queue
+                * should be restarted later. Please see flush_end_io() for
+                * details.
+                */
+               if (q->flush_pending_idx != q->flush_running_idx &&
+                               !queue_flush_queueable(q)) {
+                       q->flush_queue_delayed = 1;
+                       return NULL;
+               }
                if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
                    !q->elevator->ops->elevator_dispatch_fn(q, 0))
                        return NULL;
index ab7a9e6a9b1cb12c1952f5fd725ca2ea0dd20fd1..d646b279c8bbe88faee7a7369f1c26bbe4fc2ab8 100644 (file)
@@ -300,7 +300,9 @@ struct cfq_data {
 
        /* List of cfq groups being managed on this device*/
        struct hlist_head cfqg_list;
-       struct rcu_head rcu;
+
+       /* Number of groups which are on blkcg->blkg_list */
+       unsigned int nr_blkcg_linked_grps;
 };
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -1014,28 +1016,47 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
        cfqg->needs_update = true;
 }
 
-static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
-               struct blkio_cgroup *blkcg, int create)
+static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
+                       struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
 {
-       struct cfq_group *cfqg = NULL;
-       void *key = cfqd;
-       int i, j;
-       struct cfq_rb_root *st;
        struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
        unsigned int major, minor;
 
-       cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
-       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+       /*
+        * Add group onto cgroup list. It might happen that bdi->dev is
+        * not initialized yet. Initialize this new group without major
+        * and minor info and this info will be filled in once a new thread
+        * comes for IO.
+        */
+       if (bdi->dev) {
                sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfqg->blkg.dev = MKDEV(major, minor);
-               goto done;
-       }
-       if (cfqg || !create)
-               goto done;
+               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
+                                       (void *)cfqd, MKDEV(major, minor));
+       } else
+               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
+                                       (void *)cfqd, 0);
+
+       cfqd->nr_blkcg_linked_grps++;
+       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+
+       /* Add group on cfqd list */
+       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+}
+
+/*
+ * Should be called from sleepable context. No request queue lock as per
+ * cpu stats are allocated dynamically and alloc_percpu needs to be called
+ * from sleepable context.
+ */
+static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+{
+       struct cfq_group *cfqg = NULL;
+       int i, j, ret;
+       struct cfq_rb_root *st;
 
        cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
        if (!cfqg)
-               goto done;
+               return NULL;
 
        for_each_cfqg_st(cfqg, i, j, st)
                *st = CFQ_RB_ROOT;
@@ -1049,43 +1070,94 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
         */
        cfqg->ref = 1;
 
+       ret = blkio_alloc_blkg_stats(&cfqg->blkg);
+       if (ret) {
+               kfree(cfqg);
+               return NULL;
+       }
+
+       return cfqg;
+}
+
+static struct cfq_group *
+cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
+{
+       struct cfq_group *cfqg = NULL;
+       void *key = cfqd;
+       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
+       unsigned int major, minor;
+
        /*
-        * Add group onto cgroup list. It might happen that bdi->dev is
-        * not initialized yet. Initialize this new group without major
-        * and minor info and this info will be filled in once a new thread
-        * comes for IO. See code above.
+        * This is the common case when there are no blkio cgroups.
+        * Avoid lookup in this case
         */
-       if (bdi->dev) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
-                                       MKDEV(major, minor));
-       } else
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
-                                       0);
-
-       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+       if (blkcg == &blkio_root_cgroup)
+               cfqg = &cfqd->root_group;
+       else
+               cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
 
-       /* Add group on cfqd list */
-       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+               cfqg->blkg.dev = MKDEV(major, minor);
+       }
 
-done:
        return cfqg;
 }
 
 /*
- * Search for the cfq group current task belongs to. If create = 1, then also
- * create the cfq group if it does not exist. request_queue lock must be held.
+ * Search for the cfq group current task belongs to. request_queue lock must
+ * be held.
  */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
 {
        struct blkio_cgroup *blkcg;
-       struct cfq_group *cfqg = NULL;
+       struct cfq_group *cfqg = NULL, *__cfqg = NULL;
+       struct request_queue *q = cfqd->queue;
 
        rcu_read_lock();
        blkcg = task_blkio_cgroup(current);
-       cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create);
-       if (!cfqg && create)
+       cfqg = cfq_find_cfqg(cfqd, blkcg);
+       if (cfqg) {
+               rcu_read_unlock();
+               return cfqg;
+       }
+
+       /*
+        * Need to allocate a group. Allocation of group also needs allocation
+        * of per cpu stats which in-turn takes a mutex() and can block. Hence
+        * we need to drop rcu lock and queue_lock before we call alloc.
+        *
+        * Not taking any queue reference here and assuming that queue is
+        * around by the time we return. CFQ queue allocation code does
+        * the same. It might be racy though.
+        */
+
+       rcu_read_unlock();
+       spin_unlock_irq(q->queue_lock);
+
+       cfqg = cfq_alloc_cfqg(cfqd);
+
+       spin_lock_irq(q->queue_lock);
+
+       rcu_read_lock();
+       blkcg = task_blkio_cgroup(current);
+
+       /*
+        * If some other thread already allocated the group while we were
+        * not holding queue lock, free up the group
+        */
+       __cfqg = cfq_find_cfqg(cfqd, blkcg);
+
+       if (__cfqg) {
+               kfree(cfqg);
+               rcu_read_unlock();
+               return __cfqg;
+       }
+
+       if (!cfqg)
                cfqg = &cfqd->root_group;
+
+       cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
        rcu_read_unlock();
        return cfqg;
 }
@@ -1118,6 +1190,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
                return;
        for_each_cfqg_st(cfqg, i, j, st)
                BUG_ON(!RB_EMPTY_ROOT(&st->rb));
+       free_percpu(cfqg->blkg.stats_cpu);
        kfree(cfqg);
 }
 
@@ -1176,7 +1249,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
 }
 
 #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
 {
        return &cfqd->root_group;
 }
@@ -2911,7 +2984,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
        struct cfq_group *cfqg;
 
 retry:
-       cfqg = cfq_get_cfqg(cfqd, 1);
+       cfqg = cfq_get_cfqg(cfqd);
        cic = cfq_cic_lookup(cfqd, ioc);
        /* cic always exists here */
        cfqq = cic_to_cfqq(cic, is_sync);
@@ -3815,15 +3888,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
                cfq_put_queue(cfqd->async_idle_cfqq);
 }
 
-static void cfq_cfqd_free(struct rcu_head *head)
-{
-       kfree(container_of(head, struct cfq_data, rcu));
-}
-
 static void cfq_exit_queue(struct elevator_queue *e)
 {
        struct cfq_data *cfqd = e->elevator_data;
        struct request_queue *q = cfqd->queue;
+       bool wait = false;
 
        cfq_shutdown_timer_wq(cfqd);
 
@@ -3842,7 +3911,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
        cfq_put_async_queues(cfqd);
        cfq_release_cfq_groups(cfqd);
-       cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
+
+       /*
+        * If there are groups which we could not unlink from blkcg list,
+        * wait for a rcu period for them to be freed.
+        */
+       if (cfqd->nr_blkcg_linked_grps)
+               wait = true;
 
        spin_unlock_irq(q->queue_lock);
 
@@ -3852,8 +3927,20 @@ static void cfq_exit_queue(struct elevator_queue *e)
        ida_remove(&cic_index_ida, cfqd->cic_index);
        spin_unlock(&cic_index_lock);
 
-       /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
-       call_rcu(&cfqd->rcu, cfq_cfqd_free);
+       /*
+        * Wait for cfqg->blkg->key accessors to exit their grace periods.
+        * Do this wait only if there are other unlinked groups out
+        * there. This can happen if cgroup deletion path claimed the
+        * responsibility of cleaning up a group before queue cleanup code
+        * get to the group.
+        *
+        * Do not call synchronize_rcu() unconditionally as there are drivers
+        * which create/delete request queue hundreds of times during scan/boot
+        * and synchronize_rcu() can take significant time and slow down boot.
+        */
+       if (wait)
+               synchronize_rcu();
+       kfree(cfqd);
 }
 
 static int cfq_alloc_cic_index(void)
@@ -3909,14 +3996,29 @@ static void *cfq_init_queue(struct request_queue *q)
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
        /*
-        * Take a reference to root group which we never drop. This is just
-        * to make sure that cfq_put_cfqg() does not try to kfree root group
+        * Set root group reference to 2. One reference will be dropped when
+        * all groups on cfqd->cfqg_list are being deleted during queue exit.
+        * Other reference will remain there as we don't want to delete this
+        * group as it is statically allocated and gets destroyed when
+        * throtl_data goes away.
         */
-       cfqg->ref = 1;
+       cfqg->ref = 2;
+
+       if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
+               kfree(cfqg);
+               kfree(cfqd);
+               return NULL;
+       }
+
        rcu_read_lock();
+
        cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
                                        (void *)cfqd, 0);
        rcu_read_unlock();
+       cfqd->nr_blkcg_linked_grps++;
+
+       /* Add group on cfqd->cfqg_list */
+       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
 #endif
        /*
         * Not strictly needed (since RB_ROOT just clears the node and we
index 45ca1e34f58249ff5b3838c0ec89fbc1c6ea2bb7..2a0b653c90fde6df27b5edc88f8421f25c18eca2 100644 (file)
@@ -155,13 +155,8 @@ static struct elevator_type *elevator_get(const char *name)
 
        e = elevator_find(name);
        if (!e) {
-               char elv[ELV_NAME_MAX + strlen("-iosched")];
-
                spin_unlock(&elv_list_lock);
-
-               snprintf(elv, sizeof(elv), "%s-iosched", name);
-
-               request_module("%s", elv);
+               request_module("%s-iosched", name);
                spin_lock(&elv_list_lock);
                e = elevator_find(name);
        }
index e2f57e9e12f0eecb0fd8bbd2603ef14122ff003e..dad2c952e0d255ff4ccabe14578be9d1354f1dd1 100644 (file)
@@ -1089,21 +1089,21 @@ static int atapi_drain_needed(struct request *rq)
 static int ata_scsi_dev_config(struct scsi_device *sdev,
                               struct ata_device *dev)
 {
+       struct request_queue *q = sdev->request_queue;
+
        if (!ata_id_has_unload(dev->id))
                dev->flags |= ATA_DFLAG_NO_UNLOAD;
 
        /* configure max sectors */
-       blk_queue_max_hw_sectors(sdev->request_queue, dev->max_sectors);
+       blk_queue_max_hw_sectors(q, dev->max_sectors);
 
        if (dev->class == ATA_DEV_ATAPI) {
-               struct request_queue *q = sdev->request_queue;
                void *buf;
 
                sdev->sector_size = ATA_SECT_SIZE;
 
                /* set DMA padding */
-               blk_queue_update_dma_pad(sdev->request_queue,
-                                        ATA_DMA_PAD_SZ - 1);
+               blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
 
                /* configure draining */
                buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
@@ -1131,8 +1131,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
                        "sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
                        sdev->sector_size);
 
-       blk_queue_update_dma_alignment(sdev->request_queue,
-                                      sdev->sector_size - 1);
+       blk_queue_update_dma_alignment(q, sdev->sector_size - 1);
 
        if (dev->flags & ATA_DFLAG_AN)
                set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
@@ -1145,6 +1144,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
                scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
        }
 
+       blk_queue_flush_queueable(q, false);
+
        dev->sdev = sdev;
        return 0;
 }
index bdd2719f3f68382084fb04170b1c14022e442425..bc9e702186dd64daf5445ea09710e5e8c7ee59eb 100644 (file)
@@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent)
 }
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id fore200e_sba_match[];
 static int __devinit fore200e_sba_probe(struct platform_device *op)
 {
+       const struct of_device_id *match;
        const struct fore200e_bus *bus;
        struct fore200e *fore200e;
        static int index = 0;
        int err;
 
-       if (!op->dev.of_match)
+       match = of_match_device(fore200e_sba_match, &op->dev);
+       if (!match)
                return -EINVAL;
-       bus = op->dev.of_match->data;
+       bus = match->data;
 
        fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
        if (!fore200e)
index 8690e31d993287e16a68caca0e2125c0c7e6bdb0..a0aabd904a51294605a20e39de391660bff77e90 100644 (file)
@@ -320,6 +320,8 @@ static void pcd_init_units(void)
                disk->first_minor = unit;
                strcpy(disk->disk_name, cd->name);      /* umm... */
                disk->fops = &pcd_bdops;
+               disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+               disk->events = DISK_EVENT_MEDIA_CHANGE;
        }
 }
 
index e427fbe459994a8491945a65fd085201f40ed01f..ae15a4ddaa9b0a3d7ff172e737dbaafd9fe8ac46 100644 (file)
@@ -625,7 +625,9 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        blk_queue_max_hw_sectors(q, 4096 / 512);
        gendisk->queue = q;
        gendisk->fops = &viocd_fops;
-       gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
+       gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
+                        GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+       gendisk->events = DISK_EVENT_MEDIA_CHANGE;
        set_capacity(gendisk, 0);
        gendisk->private_data = d;
        d->viocd_disk = gendisk;
index 43ac61978d8b2661672418901bde4242462a6165..ac6739e085e3cede7ede5588854aeca8503f1c7c 100644 (file)
@@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void)
                pr_info("%s", version);
 }
 
+static const struct of_device_id n2rng_match[];
 static int __devinit n2rng_probe(struct platform_device *op)
 {
+       const struct of_device_id *match;
        int victoria_falls;
        int err = -ENOMEM;
        struct n2rng *np;
 
-       if (!op->dev.of_match)
+       match = of_match_device(n2rng_match, &op->dev);
+       if (!match)
                return -EINVAL;
-       victoria_falls = (op->dev.of_match->data != NULL);
+       victoria_falls = (match->data != NULL);
 
        n2rng_driver_version();
        np = kzalloc(sizeof(*np), GFP_KERNEL);
index cc6c9b2546a30eed5dad30ac8a928e19514bc1c9..64c6b85306150723ad8206f11fab7f54d8dd597b 100644 (file)
@@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
+static struct of_device_id ipmi_match[];
 static int __devinit ipmi_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_OF
+       const struct of_device_id *match;
        struct smi_info *info;
        struct resource resource;
        const __be32 *regsize, *regspacing, *regshift;
@@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 
        dev_info(&dev->dev, "probing via device tree\n");
 
-       if (!dev->dev.of_match)
+       match = of_match_device(ipmi_match, &dev->dev);
+       if (!match)
                return -EINVAL;
 
        ret = of_address_to_resource(np, 0, &resource);
@@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev)
                return -ENOMEM;
        }
 
-       info->si_type           = (enum si_type) dev->dev.of_match->data;
+       info->si_type           = (enum si_type) match->data;
        info->addr_source       = SI_DEVICETREE;
        info->irq_setup         = std_irq_setup;
 
index d6412c16385f72940da3d501052742a250bdb8ed..39ccdeada79101fe2e910328110843fd569d3188 100644 (file)
@@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev)
 }
 
 #ifdef CONFIG_OF
-static int __devinit hwicap_of_probe(struct platform_device *op)
+static int __devinit hwicap_of_probe(struct platform_device *op,
+                                    const struct hwicap_driver_config *config)
 {
        struct resource res;
        const unsigned int *id;
        const char *family;
        int rc;
-       const struct hwicap_driver_config *config = op->dev.of_match->data;
        const struct config_registers *regs;
 
 
@@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op)
                        regs);
 }
 #else
-static inline int hwicap_of_probe(struct platform_device *op)
+static inline int hwicap_of_probe(struct platform_device *op,
+                                 const struct hwicap_driver_config *config)
 {
        return -EINVAL;
 }
 #endif /* CONFIG_OF */
 
+static const struct of_device_id __devinitconst hwicap_of_match[];
 static int __devinit hwicap_drv_probe(struct platform_device *pdev)
 {
+       const struct of_device_id *match;
        struct resource *res;
        const struct config_registers *regs;
        const char *family;
 
-       if (pdev->dev.of_match)
-               return hwicap_of_probe(pdev);
+       match = of_match_device(hwicap_of_match, &pdev->dev);
+       if (match)
+               return hwicap_of_probe(pdev, match->data);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!res)
index c1f0045ceb8e708ced1a6163ae921b03efe4e9d5..af8e7b1aa290a2e0d682de5d1aefb9e8808c23e1 100644 (file)
@@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
        struct ppc4xx_edac_pdata *pdata = NULL;
        const struct device_node *np = op->dev.of_node;
 
-       if (op->dev.of_match == NULL)
+       if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
                return -EINVAL;
 
        /* Initial driver pointers and private data */
index 75b984c519acd18750f5ce2b6002e60c152c5b21..107397a606b48957638a9569c2672ba363a5c57d 100644 (file)
@@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = {
        .timeout = HZ,
 };
 
+static const struct of_device_id mpc_i2c_of_match[];
 static int __devinit fsl_i2c_probe(struct platform_device *op)
 {
+       const struct of_device_id *match;
        struct mpc_i2c *i2c;
        const u32 *prop;
        u32 clock = MPC_I2C_CLOCK_LEGACY;
        int result = 0;
        int plen;
 
-       if (!op->dev.of_match)
+       match = of_match_device(mpc_i2c_of_match, &op->dev);
+       if (!match)
                return -EINVAL;
 
        i2c = kzalloc(sizeof(*i2c), GFP_KERNEL);
@@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op)
                        clock = *prop;
        }
 
-       if (op->dev.of_match->data) {
-               struct mpc_i2c_data *data = op->dev.of_match->data;
+       if (match->data) {
+               struct mpc_i2c_data *data = match->data;
                data->setup(op->dev.of_node, i2c, clock, data->prescaler);
        } else {
                /* Backwards compatibility */
index a5ec5a7cb381bbffac45a13c9e2cdccfd0a7d8b0..6e5123b1d341746341d9c3feda2ad445606c327c 100644 (file)
@@ -1781,7 +1781,8 @@ static int ide_cd_probe(ide_drive_t *drive)
 
        ide_cd_read_toc(drive, &sense);
        g->fops = &idecd_ops;
-       g->flags |= GENHD_FL_REMOVABLE;
+       g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+       g->events = DISK_EVENT_MEDIA_CHANGE;
        add_disk(g);
        return 0;
 
index f9b611fc773e19d9133e79681280164c9e4d8538..60e4186a434592bc2a42b0a365578eed4ed525f3 100644 (file)
@@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
 #endif
 }
 
+static const struct of_device_id sdhci_of_match[];
 static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct device_node *np = ofdev->dev.of_node;
        struct sdhci_of_data *sdhci_of_data;
        struct sdhci_host *host;
@@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev)
        int size;
        int ret;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(sdhci_of_match, &ofdev->dev);
+       if (!match)
                return -EINVAL;
-       sdhci_of_data = ofdev->dev.of_match->data;
+       sdhci_of_data = match->data;
 
        if (!of_device_is_available(np))
                return -ENODEV;
index bd483f0c57e1ae31ca98c1336cd3f4bed248310b..c1d33464aee8926e66afcccc23dbbbacdb718ba8 100644 (file)
@@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes)
 }
 #endif
 
+static struct of_device_id of_flash_match[];
 static int __devinit of_flash_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_MTD_PARTITIONS
        const char **part_probe_types;
 #endif
+       const struct of_device_id *match;
        struct device_node *dp = dev->dev.of_node;
        struct resource res;
        struct of_flash *info;
@@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev)
        struct mtd_info **mtd_list = NULL;
        resource_size_t res_size;
 
-       if (!dev->dev.of_match)
+       match = of_match_device(of_flash_match, &dev->dev);
+       if (!match)
                return -EINVAL;
-       probe_type = dev->dev.of_match->data;
+       probe_type = match->data;
 
        reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
index bd1d811c204f276cbf2aa74f07035cd8ae0875ef..5fedc3375562316c3e13f0a9a7ff4b94854c9c34 100644 (file)
@@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
 }
 #endif /* CONFIG_PPC_MPC512x */
 
+static struct of_device_id mpc5xxx_can_table[];
 static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct mpc5xxx_can_data *data;
        struct device_node *np = ofdev->dev.of_node;
        struct net_device *dev;
@@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
        int irq, mscan_clksrc = 0;
        int err = -ENOMEM;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
+       if (!match)
                return -EINVAL;
-       data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data;
+       data = match->data;
 
        base = of_iomap(np, 0);
        if (!base) {
index 24cb953900dde794e60fe3d5921c7e0698f800e8..5131e61c358cd8b9cde0ce4aa7dcfaf36df11249 100644 (file)
@@ -998,8 +998,10 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 #endif
 };
 
+static struct of_device_id fs_enet_match[];
 static int __devinit fs_enet_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct net_device *ndev;
        struct fs_enet_private *fep;
        struct fs_platform_info *fpi;
@@ -1007,14 +1009,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
        const u8 *mac_addr;
        int privsize, len, ret = -ENODEV;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(fs_enet_match, &ofdev->dev);
+       if (!match)
                return -EINVAL;
 
        fpi = kzalloc(sizeof(*fpi), GFP_KERNEL);
        if (!fpi)
                return -ENOMEM;
 
-       if (!IS_FEC(ofdev->dev.of_match)) {
+       if (!IS_FEC(match)) {
                data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len);
                if (!data || len != 4)
                        goto out_free_fpi;
@@ -1049,7 +1052,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
        fep->dev = &ofdev->dev;
        fep->ndev = ndev;
        fep->fpi = fpi;
-       fep->ops = ofdev->dev.of_match->data;
+       fep->ops = match->data;
 
        ret = fep->ops->setup_data(ndev);
        if (ret)
index 7e840d373ab36b73fce7676684a1110056ccb4b5..6a2e150e75bb2d774d2afc50f3913f558c92ae77 100644 (file)
@@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
        return 0;
 }
 
+static struct of_device_id fs_enet_mdio_fec_match[];
 static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct resource res;
        struct mii_bus *new_bus;
        struct fec_info *fec;
        int (*get_bus_freq)(struct device_node *);
        int ret = -ENOMEM, clock, speed;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev);
+       if (!match)
                return -EINVAL;
-       get_bus_freq = ofdev->dev.of_match->data;
+       get_bus_freq = match->data;
 
        new_bus = mdiobus_alloc();
        if (!new_bus)
index eb4f59fb01e90cacc722a732e23ef46181747c61..bff2f7999ff0d36740632c8d35bdcbc067d9a756 100644 (file)
@@ -3237,15 +3237,18 @@ static void happy_meal_pci_exit(void)
 #endif
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id hme_sbus_match[];
 static int __devinit hme_sbus_probe(struct platform_device *op)
 {
+       const struct of_device_id *match;
        struct device_node *dp = op->dev.of_node;
        const char *model = of_get_property(dp, "model", NULL);
        int is_qfe;
 
-       if (!op->dev.of_match)
+       match = of_match_device(hme_sbus_match, &op->dev);
+       if (!match)
                return -EINVAL;
-       is_qfe = (op->dev.of_match->data != NULL);
+       is_qfe = (match->data != NULL);
 
        if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
                is_qfe = 1;
index e2d45c91b8e8fa9d99d2d6abc17739aff12e6012..9689d41c788887f4f3fb12e43c87b0c67ae1aa39 100644 (file)
@@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = {
        .use_clustering         = ENABLE_CLUSTERING,
 };
 
+static const struct of_device_id qpti_match[];
 static int __devinit qpti_sbus_probe(struct platform_device *op)
 {
+       const struct of_device_id *match;
        struct scsi_host_template *tpnt;
        struct device_node *dp = op->dev.of_node;
        struct Scsi_Host *host;
@@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op)
        static int nqptis;
        const char *fcode;
 
-       if (!op->dev.of_match)
+       match = of_match_device(qpti_match, &op->dev);
+       if (!match)
                return -EINVAL;
-       tpnt = op->dev.of_match->data;
+       tpnt = match->data;
 
        /* Sometimes Antares cards come up not completely
         * setup, and we get a report of a zero IRQ.
index 95019c747cc17cad69ed22c9091c2228b914c23b..4778e27071689593ec4fdffdd1107018c2f7ebe6 100644 (file)
@@ -636,7 +636,7 @@ static int sr_probe(struct device *dev)
        disk->first_minor = minor;
        sprintf(disk->disk_name, "sr%d", minor);
        disk->fops = &sr_bdops;
-       disk->flags = GENHD_FL_CD;
+       disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
        disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
 
        blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
index 0e8eec516df4d94f0395842a3f4573cd036dbfdc..c911b2419abb8a664f3bfad7d258a94c1363fa5b 100644 (file)
@@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 /*
  * Try to register a serial port
  */
+static struct of_device_id of_platform_serial_table[];
 static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct of_serial_info *info;
        struct uart_port port;
        int port_type;
        int ret;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(of_platform_serial_table, &ofdev->dev);
+       if (!match)
                return -EINVAL;
 
        if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL))
@@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
        if (info == NULL)
                return -ENOMEM;
 
-       port_type = (unsigned long)ofdev->dev.of_match->data;
+       port_type = (unsigned long)match->data;
        ret = of_platform_serial_setup(ofdev, port_type, &port);
        if (ret)
                goto out;
index 36613b37c50442c96a3bad47449365aaf31b1e89..3a68e09309f750107528451f7d2b4c40d645f1b1 100644 (file)
@@ -2539,15 +2539,18 @@ static void qe_udc_release(struct device *dev)
 }
 
 /* Driver probe functions */
+static const struct of_device_id qe_udc_match[];
 static int __devinit qe_udc_probe(struct platform_device *ofdev)
 {
+       const struct of_device_id *match;
        struct device_node *np = ofdev->dev.of_node;
        struct qe_ep *ep;
        unsigned int ret = 0;
        unsigned int i;
        const void *prop;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(qe_udc_match, &ofdev->dev);
+       if (!match)
                return -EINVAL;
 
        prop = of_get_property(np, "mode", NULL);
@@ -2561,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev)
                return -ENOMEM;
        }
 
-       udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data;
+       udc_controller->soc_type = (unsigned long)match->data;
        udc_controller->usb_regs = of_iomap(np, 0);
        if (!udc_controller->usb_regs) {
                ret = -ENOMEM;
index 528bceb220fd5783383d8575e99ff2099fdbdd95..eed5436ffb512dd75b54f3c10a97040f71a0c71d 100644 (file)
@@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = {
        .fops   = &mpc8xxx_wdt_fops,
 };
 
+static const struct of_device_id mpc8xxx_wdt_match[];
 static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev)
 {
        int ret;
+       const struct of_device_id *match;
        struct device_node *np = ofdev->dev.of_node;
        struct mpc8xxx_wdt_type *wdt_type;
        u32 freq = fsl_get_sys_freq();
        bool enabled;
 
-       if (!ofdev->dev.of_match)
+       match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev);
+       if (!match)
                return -EINVAL;
-       wdt_type = ofdev->dev.of_match->data;
+       wdt_type = match->data;
 
        if (!freq || freq == -1)
                return -EINVAL;
index 257b00e9842808721780d1af1c09aa2f35558d35..d7c2e0fddc6f231cb4734af8355a54f846d78660 100644 (file)
@@ -1237,6 +1237,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
        res = __blkdev_get(bdev, mode, 0);
 
        if (whole) {
+               struct gendisk *disk = whole->bd_disk;
+
                /* finish claiming */
                mutex_lock(&bdev->bd_mutex);
                spin_lock(&bdev_lock);
@@ -1263,15 +1265,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
                spin_unlock(&bdev_lock);
 
                /*
-                * Block event polling for write claims.  Any write
-                * holder makes the write_holder state stick until all
-                * are released.  This is good enough and tracking
-                * individual writeable reference is too fragile given
-                * the way @mode is used in blkdev_get/put().
+                * Block event polling for write claims if requested.  Any
+                * write holder makes the write_holder state stick until
+                * all are released.  This is good enough and tracking
+                * individual writeable reference is too fragile given the
+                * way @mode is used in blkdev_get/put().
                 */
-               if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+               if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+                   !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
                        bdev->bd_write_holder = true;
-                       disk_block_events(bdev->bd_disk);
+                       disk_block_events(disk);
                }
 
                mutex_unlock(&bdev->bd_mutex);
index 3313dd19f543841d7e4689cfd18da028e4522892..9a37a9b6de3a23f026f6dc0d9c2906e07b1f8bf0 100644 (file)
@@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);
 static void configfs_d_iput(struct dentry * dentry,
                            struct inode * inode)
 {
-       struct configfs_dirent * sd = dentry->d_fsdata;
+       struct configfs_dirent *sd = dentry->d_fsdata;
 
        if (sd) {
                BUG_ON(sd->s_dentry != dentry);
+               /* Coordinate with configfs_readdir */
+               spin_lock(&configfs_dirent_lock);
                sd->s_dentry = NULL;
+               spin_unlock(&configfs_dirent_lock);
                configfs_put(sd);
        }
        iput(inode);
@@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,
                        sd = child->d_fsdata;
                        sd->s_type |= CONFIGFS_USET_DEFAULT;
                } else {
-                       d_delete(child);
+                       BUG_ON(child->d_inode);
+                       d_drop(child);
                        dput(child);
                }
        }
@@ -1545,7 +1549,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
        struct configfs_dirent *cursor = filp->private_data;
        struct list_head *p, *q = &cursor->s_sibling;
-       ino_t ino;
+       ino_t ino = 0;
        int i = filp->f_pos;
 
        switch (i) {
@@ -1573,6 +1577,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
                                struct configfs_dirent *next;
                                const char * name;
                                int len;
+                               struct inode *inode = NULL;
 
                                next = list_entry(p, struct configfs_dirent,
                                                   s_sibling);
@@ -1581,9 +1586,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 
                                name = configfs_get_name(next);
                                len = strlen(name);
-                               if (next->s_dentry)
-                                       ino = next->s_dentry->d_inode->i_ino;
-                               else
+
+                               /*
+                                * We'll have a dentry and an inode for
+                                * PINNED items and for open attribute
+                                * files.  We lock here to prevent a race
+                                * with configfs_d_iput() clearing
+                                * s_dentry before calling iput().
+                                *
+                                * Why do we go to the trouble?  If
+                                * someone has an attribute file open,
+                                * the inode number should match until
+                                * they close it.  Beyond that, we don't
+                                * care.
+                                */
+                               spin_lock(&configfs_dirent_lock);
+                               dentry = next->s_dentry;
+                               if (dentry)
+                                       inode = dentry->d_inode;
+                               if (inode)
+                                       ino = inode->i_ino;
+                               spin_unlock(&configfs_dirent_lock);
+                               if (!inode)
                                        ino = iunique(configfs_sb, 2);
 
                                if (filldir(dirent, name, len, filp->f_pos, ino,
@@ -1683,7 +1707,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                err = configfs_attach_group(sd->s_element, &group->cg_item,
                                            dentry);
                if (err) {
-                       d_delete(dentry);
+                       BUG_ON(dentry->d_inode);
+                       d_drop(dentry);
                        dput(dentry);
                } else {
                        spin_lock(&configfs_dirent_lock);
index 643720209a9899506b9a1c477b458b91b5070d4c..9a3e6bbff27bd4839b487c2c14282fd9ef1a4675 100644 (file)
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
 
 /* We want to make sure that nobody is heartbeating on top of us --
  * this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
 {
-       int node_num, ret;
        struct o2hb_disk_slot *slot;
        struct o2hb_disk_heartbeat_block *hb_block;
+       char *errstr;
 
-       node_num = o2nm_this_node();
-
-       ret = 1;
-       slot = &reg->hr_slots[node_num];
+       slot = &reg->hr_slots[o2nm_this_node()];
        /* Don't check on our 1st timestamp */
-       if (slot->ds_last_time) {
-               hb_block = slot->ds_raw_block;
+       if (!slot->ds_last_time)
+               return;
 
-               if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
-                       ret = 0;
-       }
+       hb_block = slot->ds_raw_block;
+       if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
+           le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
+           hb_block->hb_node == slot->ds_node_num)
+               return;
 
-       return ret;
+#define ERRSTR1                "Another node is heartbeating on device"
+#define ERRSTR2                "Heartbeat generation mismatch on device"
+#define ERRSTR3                "Heartbeat sequence mismatch on device"
+
+       if (hb_block->hb_node != slot->ds_node_num)
+               errstr = ERRSTR1;
+       else if (le64_to_cpu(hb_block->hb_generation) !=
+                slot->ds_last_generation)
+               errstr = ERRSTR2;
+       else
+               errstr = ERRSTR3;
+
+       mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
+            "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
+            slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
+            (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+            (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+            (unsigned long long)le64_to_cpu(hb_block->hb_seq));
 }
 
 static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
        /* With an up to date view of the slots, we can check that no
         * other node has been improperly configured to heartbeat in
         * our slot. */
-       if (!o2hb_check_last_timestamp(reg))
-               mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
-                    "in our slot!\n", reg->hr_dev_name);
+       o2hb_check_last_timestamp(reg);
 
        /* fill in the proper info for our next heartbeat */
        o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
        }
 
        i = -1;
-       while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
-
+       while((i = find_next_bit(configured_nodes,
+                                O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
                change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
        }
 
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
        struct file *filp = NULL;
        struct inode *inode = NULL;
        ssize_t ret = -EINVAL;
+       int live_threshold;
 
        if (reg->hr_bdev)
                goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
         * A node is considered live after it has beat LIVE_THRESHOLD
         * times.  We're not steady until we've given them a chance
         * _after_ our first read.
+        * The default threshold is bare minimum so as to limit the delay
+        * during mounts. For global heartbeat, the threshold doubled for the
+        * first region.
         */
-       atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1);
+       live_threshold = O2HB_LIVE_THRESHOLD;
+       if (o2hb_global_heartbeat_active()) {
+               spin_lock(&o2hb_live_lock);
+               if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
+                       live_threshold <<= 1;
+               spin_unlock(&o2hb_live_lock);
+       }
+       atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
 
        hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
                              reg->hr_item.ci_name);
index 9fe5b8fd658f0a3db09f1919512a0479a241ecca..8582e3f4f120647d05df81556639e627882b5811 100644 (file)
@@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
                bytes = blocks_wanted << sb->s_blocksize_bits;
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
        struct ocfs2_inode_info *oi = OCFS2_I(dir);
-       struct ocfs2_alloc_context *data_ac;
+       struct ocfs2_alloc_context *data_ac = NULL;
        struct ocfs2_alloc_context *meta_ac = NULL;
        struct buffer_head *dirdata_bh = NULL;
        struct buffer_head *dx_root_bh = NULL;
index 7540a492eaba0188d624a08f73688eee04838288..3b179d6cbde09be9017eb21fbefcc5df8b217a53 100644 (file)
@@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
        spin_unlock(&dlm->spinlock);
 
        /* Support for global heartbeat and node info was added in 1.1 */
-       if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
+       if (dlm->dlm_locking_proto.pv_major > 1 ||
+           dlm->dlm_locking_proto.pv_minor > 0) {
                status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
                if (status) {
                        mlog_errno(status);
index fede57ed005ff417b80b5c3bc9c1d25fe3aaa526..84d166328cf7448f36cfcb0a54f08eefda50e18d 100644 (file)
@@ -2574,6 +2574,9 @@ fail:
                res->state &= ~DLM_LOCK_RES_MIGRATING;
                wake = 1;
                spin_unlock(&res->spinlock);
+               if (dlm_is_host_down(ret))
+                       dlm_wait_for_node_death(dlm, target,
+                                               DLM_NODE_DEATH_WAIT_MAX);
                goto leave;
        }
 
index 41565ae528566b3a5bfca14092aad21bba721812..89659d6dc2067276f2cebcea63a9bcc34a0a2486 100644 (file)
@@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
        range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
 
        if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
+               /*
+                * remove an entire extent record.
+                */
                *trunc_cpos = le32_to_cpu(rec->e_cpos);
                /*
                 * Skip holes if any.
@@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
                *blkno = le64_to_cpu(rec->e_blkno);
                *trunc_end = le32_to_cpu(rec->e_cpos);
        } else if (range > trunc_start) {
+               /*
+                * remove a partial extent record, which means we're
+                * removing the last extent record.
+                */
                *trunc_cpos = trunc_start;
+               /*
+                * skip hole if any.
+                */
+               if (range < *trunc_end)
+                       *trunc_end = range;
                *trunc_len = *trunc_end - trunc_start;
                coff = trunc_start - le32_to_cpu(rec->e_cpos);
                *blkno = le64_to_cpu(rec->e_blkno) +
index b141a44605ca076132cd704e22226ac8ce263fa7..295d56454e8b23b6e9d97a6bd258e6170311c8f5 100644 (file)
@@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
 {
        struct ocfs2_journal *journal = osb->journal;
 
+       if (ocfs2_is_hard_readonly(osb))
+               return;
+
        /* No need to queue up our truncate_log as regular cleanup will catch
         * that */
        ocfs2_queue_recovery_completion(journal, osb->slot_num,
index d545e97d99c3390706894ff1b030c2491a2c0a38..8ed4d3433199fb233913c3f2e6e88afc149c062c 100644 (file)
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
        struct hd_struct *p = dev_to_part(dev);
-       return sprintf(buf, "%u\n", p->discard_alignment);
+       struct gendisk *disk = dev_to_disk(dev);
+
+       return sprintf(buf, "%u\n",
+                       queue_limit_discard_alignment(&disk->queue->limits,
+                                                       p->start_sect));
 }
 
 ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
        p->start_sect = start;
        p->alignment_offset =
                queue_limit_alignment_offset(&disk->queue->limits, start);
-       p->discard_alignment =
-               queue_limit_discard_alignment(&disk->queue->limits, start);
        p->nr_sects = len;
        p->partno = partno;
        p->policy = get_disk_ro(disk);
index 2ad95fa1d130f8c8f5cfd68e5fe7a0108aec31d9..ae9091a684807ffcc95fa6c0fb6de3233383d4cf 100644 (file)
@@ -257,7 +257,7 @@ struct queue_limits {
        unsigned char           misaligned;
        unsigned char           discard_misaligned;
        unsigned char           cluster;
-       signed char             discard_zeroes_data;
+       unsigned char           discard_zeroes_data;
 };
 
 struct request_queue
@@ -364,6 +364,8 @@ struct request_queue
         * for flush operations
         */
        unsigned int            flush_flags;
+       unsigned int            flush_not_queueable:1;
+       unsigned int            flush_queue_delayed:1;
        unsigned int            flush_pending_idx:1;
        unsigned int            flush_running_idx:1;
        unsigned long           flush_pending_since;
@@ -843,6 +845,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
+extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1066,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 {
        unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 
+       if (!lim->max_discard_sectors)
+               return 0;
+
        return (lim->discard_granularity + lim->discard_alignment - alignment)
                & (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
-       if (q->limits.discard_zeroes_data == 1)
+       if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
                return 1;
 
        return 0;
@@ -1111,6 +1117,11 @@ static inline unsigned int block_size(struct block_device *bdev)
        return bdev->bd_block_size;
 }
 
+static inline bool queue_flush_queueable(struct request_queue *q)
+{
+       return !q->flush_not_queueable;
+}
+
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
index ab8dfc095709674c9afca67152af5c651f1e2d03..d08399db6e2c6076f8e496a155ca667bbfa70af9 100644 (file)
@@ -442,7 +442,6 @@ struct device {
        struct dev_archdata     archdata;
 
        struct device_node      *of_node; /* associated device tree node */
-       const struct of_device_id *of_match; /* matching of_device_id from driver */
 
        dev_t                   devt;   /* dev_t, creates the sysfs "dev" */
 
index d764a426e9fdbf5b5086542e3eeb6c6a2e6d3c8a..b78956b3c2e749076078cf74e439144c447c9058 100644 (file)
@@ -100,7 +100,6 @@ struct hd_struct {
        sector_t start_sect;
        sector_t nr_sects;
        sector_t alignment_offset;
-       unsigned int discard_alignment;
        struct device __dev;
        struct kobject *holder_dir;
        int policy, partno;
@@ -127,6 +126,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO       32
 #define GENHD_FL_EXT_DEVT                      64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY               128
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE    256
 
 enum {
        DISK_EVENT_MEDIA_CHANGE                 = 1 << 0, /* media changed */
index 8bfe6c1d43654069b8fa2e1bbc61ab4d34dd4d22..ae5638480ef22141f0699854d4b59fbba57aeffe 100644 (file)
@@ -21,8 +21,7 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
                                         const struct device_driver *drv)
 {
-       dev->of_match = of_match_device(drv->of_match_table, dev);
-       return dev->of_match != NULL;
+       return of_match_device(drv->of_match_table, dev) != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
@@ -58,6 +57,11 @@ static inline int of_device_uevent(struct device *dev,
 
 static inline void of_device_node_put(struct device *dev) { }
 
+static inline const struct of_device_id *of_match_device(
+               const struct of_device_id *matches, const struct device *dev)
+{
+       return NULL;
+}
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */