Merge branch 'for-linus' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Jul 2015 19:12:16 +0000 (12:12 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Jul 2015 19:12:16 +0000 (12:12 -0700)
Pull block fixes from Jens Axboe:
 "Mainly sending this off now for the writeback fixes, since they fix a
  real regression introduced with the cgroup writeback changes.  The
  NVMe fix could wait for next pull for this series, but it's simple
  enough that we might as well include it.

  This contains:

   - two cgroup writeback fixes from Tejun, fixing a user reported issue
     with luks crypt devices hanging when being closed.

   - NVMe error cleanup fix from Jon Derrick, fixing a case where we'd
     attempt to free an unregistered IRQ"

* 'for-linus' of git://git.kernel.dk/linux-block:
  NVMe: Fix irq freeing when queue_request_irq fails
  writeback: don't drain bdi_writeback_congested on bdi destruction
  writeback: don't embed root bdi_writeback_congested in bdi_writeback

310 files changed:
Documentation/devicetree/bindings/usb/atmel-usb.txt
Documentation/ioctl/ioctl-number.txt
Documentation/kernel-parameters.txt
MAINTAINERS
Makefile
arch/arm/boot/dts/armada-xp.dtsi
arch/arm/boot/dts/at91sam9g45.dtsi
arch/arm/boot/dts/at91sam9x5.dtsi
arch/arm/boot/dts/sama5d3.dtsi
arch/arm/boot/dts/sama5d4.dtsi
arch/arm/configs/multi_v7_defconfig
arch/arm/kernel/entry-armv.S
arch/arm/mach-bcm/Kconfig
arch/arm/mach-dove/include/mach/irqs.h
arch/arm/mach-dove/irq.c
arch/arm/mach-mvebu/headsmp-a9.S
arch/arm/mach-mvebu/platsmp-a9.c
arch/arm/mach-mvebu/pm-board.c
arch/arm/mach-rockchip/platsmp.c
arch/arm/mach-vexpress/spc.c
arch/arm64/boot/dts/apm/apm-storm.dtsi
arch/cris/arch-v10/drivers/eeprom.c
arch/cris/arch-v32/mm/intmem.c
arch/frv/mb93090-mb00/flash.c
arch/ia64/hp/sim/simscsi.c
arch/ia64/mm/init.c
arch/ia64/sn/kernel/mca.c
arch/mn10300/unit-asb2303/flash.c
arch/parisc/kernel/pdc_cons.c
arch/parisc/kernel/perf.c
arch/powerpc/kernel/time.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/platforms/83xx/suspend.c
arch/powerpc/platforms/ps3/time.c
arch/powerpc/sysdev/fsl_lbc.c
arch/sh/boards/mach-highlander/psw.c
arch/sh/boards/mach-landisk/psw.c
arch/tile/kernel/usb.c
arch/x86/kernel/bootflag.c
arch/x86/kernel/cpu/perf_event_intel_bts.c
arch/x86/kernel/cpu/perf_event_intel_pt.c
arch/x86/kernel/devicetree.c
arch/x86/kernel/vsmp_64.c
arch/x86/platform/intel-mid/intel_mid_vrtc.c
arch/xtensa/platforms/iss/network.c
crypto/asymmetric_keys/pkcs7_key_type.c
drivers/acpi/Kconfig
drivers/acpi/acpica/accommon.h
drivers/acpi/acpica/acglobal.h
drivers/acpi/acpica/acinterp.h
drivers/acpi/acpica/aclocal.h
drivers/acpi/acpica/acnamesp.h
drivers/acpi/acpica/acobject.h
drivers/acpi/acpica/acstruct.h
drivers/acpi/acpica/acutils.h
drivers/acpi/acpica/dsfield.c
drivers/acpi/acpica/dsinit.c
drivers/acpi/acpica/dsobject.c
drivers/acpi/acpica/dsutils.c
drivers/acpi/acpica/dswload.c
drivers/acpi/acpica/evgpeinit.c
drivers/acpi/acpica/exconfig.c
drivers/acpi/acpica/exconvrt.c
drivers/acpi/acpica/exdebug.c
drivers/acpi/acpica/exdump.c
drivers/acpi/acpica/exfield.c
drivers/acpi/acpica/exfldio.c
drivers/acpi/acpica/exmisc.c
drivers/acpi/acpica/exnames.c
drivers/acpi/acpica/exoparg2.c
drivers/acpi/acpica/exoparg3.c
drivers/acpi/acpica/exregion.c
drivers/acpi/acpica/exstorob.c
drivers/acpi/acpica/exutils.c
drivers/acpi/acpica/hwxfsleep.c
drivers/acpi/acpica/nsaccess.c
drivers/acpi/acpica/nsconvert.c
drivers/acpi/acpica/nsdump.c
drivers/acpi/acpica/nseval.c
drivers/acpi/acpica/nsinit.c
drivers/acpi/acpica/nsparse.c
drivers/acpi/acpica/nsrepair2.c
drivers/acpi/acpica/nssearch.c
drivers/acpi/acpica/nsutils.c
drivers/acpi/acpica/nsxfeval.c
drivers/acpi/acpica/nsxfname.c
drivers/acpi/acpica/psutils.c
drivers/acpi/acpica/rscreate.c
drivers/acpi/acpica/rsmisc.c
drivers/acpi/acpica/rsutils.c
drivers/acpi/acpica/rsxface.c
drivers/acpi/acpica/tbdata.c
drivers/acpi/acpica/tbfadt.c
drivers/acpi/acpica/tbfind.c
drivers/acpi/acpica/tbinstal.c
drivers/acpi/acpica/tbprint.c
drivers/acpi/acpica/tbutils.c
drivers/acpi/acpica/tbxface.c
drivers/acpi/acpica/tbxfload.c
drivers/acpi/acpica/utalloc.c
drivers/acpi/acpica/utbuffer.c
drivers/acpi/acpica/utcache.c
drivers/acpi/acpica/utcopy.c
drivers/acpi/acpica/utdebug.c
drivers/acpi/acpica/utglobal.c
drivers/acpi/acpica/utids.c
drivers/acpi/acpica/utmisc.c
drivers/acpi/acpica/utosi.c
drivers/acpi/acpica/utpredef.c
drivers/acpi/acpica/utprint.c
drivers/acpi/acpica/utstring.c
drivers/acpi/acpica/uttrack.c
drivers/acpi/acpica/utxface.c
drivers/acpi/acpica/utxfinit.c
drivers/acpi/blacklist.c
drivers/acpi/internal.h
drivers/acpi/osl.c
drivers/block/rbd.c
drivers/char/agp/intel-gtt.c
drivers/clk/clk-max77686.c
drivers/clk/clk-max77802.c
drivers/clk/clk-nomadik.c
drivers/clk/sunxi/clk-mod0.c
drivers/cpufreq/exynos-cpufreq.c
drivers/cpufreq/s5pv210-cpufreq.c
drivers/cpuidle/cpuidle-at91.c
drivers/cpuidle/cpuidle-calxeda.c
drivers/cpuidle/cpuidle-zynq.c
drivers/crypto/mv_cesa.c
drivers/edac/octeon_edac-l2c.c
drivers/edac/octeon_edac-lmc.c
drivers/edac/octeon_edac-pc.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/cik.c
drivers/gpu/drm/amd/amdgpu/cikd.h
drivers/gpu/drm/amd/amdgpu/cz_dpm.c
drivers/gpu/drm/amd/amdgpu/cz_dpm.h
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_panel.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_sdma.c
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_fb.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/radeon_vm.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/hsi/controllers/omap_ssi.h
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/mcp3021.c
drivers/hwmon/nct7802.c
drivers/mailbox/pl320-ipc.c
drivers/mmc/host/omap_hsmmc.c
drivers/pcmcia/xxs1500_ss.c
drivers/platform/goldfish/pdev_bus.c
drivers/power/reset/syscon-reboot.c
drivers/regulator/max77802.c
drivers/soc/qcom/spm.c
drivers/soc/tegra/pmc.c
drivers/soc/versatile/soc-realview.c
drivers/tty/metag_da.c
drivers/tty/serial/8250/8250_omap.c
drivers/tty/serial/omap-serial.c
drivers/video/fbdev/omap2/dss/dss.c
fs/ceph/acl.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/snap.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/fuse/cuse.c
fs/fuse/dev.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/nfs/callback.c
fs/nfs/callback_proc.c
fs/nfs/callback_xdr.c
fs/nfs/client.c
fs/nfs/dir.c
fs/nfs/file.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayout.h
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/inode.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs42.h
fs/nfs/nfs42proc.c
fs/nfs/nfs42xdr.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4client.c
fs/nfs/nfs4file.c
fs/nfs/nfs4getroot.c
fs/nfs/nfs4idmap.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/write.c
fs/notify/inotify/inotify_user.c
fs/overlayfs/readdir.c
fs/overlayfs/super.c
include/acpi/acnames.h
include/acpi/acoutput.h
include/acpi/acpixf.h
include/acpi/actbl.h
include/acpi/actbl1.h
include/acpi/actbl2.h
include/acpi/actbl3.h
include/acpi/actypes.h
include/acpi/platform/acenv.h
include/acpi/platform/acenvex.h
include/acpi/platform/acgcc.h
include/linux/ceph/libceph.h
include/linux/ceph/osd_client.h
include/linux/crush/crush.h
include/linux/crush/hash.h
include/linux/crush/mapper.h
include/linux/device.h
include/linux/init.h
include/linux/nfs4.h
include/linux/nfs_fs.h
include/linux/nfs_fs_sb.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
include/linux/platform_device.h
include/linux/sunrpc/bc_xprt.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtrdma.h
include/uapi/drm/amdgpu_drm.h
include/uapi/linux/fuse.h
kernel/Makefile
lib/Makefile
lib/debug_info.c [new file with mode: 0644]
lib/list_sort.c
mm/nommu.c
mm/page_owner.c
net/ceph/ceph_common.c
net/ceph/crush/crush.c
net/ceph/crush/crush_ln_table.h
net/ceph/crush/hash.c
net/ceph/crush/mapper.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/ceph/pagevec.c
net/ipv4/netfilter.c
net/sunrpc/Makefile
net/sunrpc/backchannel_rqst.c
net/sunrpc/bc_svc.c [deleted file]
net/sunrpc/clnt.c
net/sunrpc/debugfs.c
net/sunrpc/svc.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/physical_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
scripts/kconfig/Makefile
scripts/kconfig/expr.c
scripts/kconfig/expr.h
scripts/kconfig/symbol.c
scripts/kconfig/zconf.l
scripts/kconfig/zconf.lex.c_shipped
scripts/kconfig/zconf.tab.c_shipped
scripts/kconfig/zconf.y
scripts/link-vmlinux.sh
scripts/tags.sh
tools/power/acpi/common/getopt.c
tools/power/acpi/man/acpidump.8
tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
tools/power/acpi/os_specific/service_layers/osunixmap.c
tools/power/acpi/tools/acpidump/acpidump.h
tools/power/acpi/tools/acpidump/apdump.c
tools/power/acpi/tools/acpidump/apfiles.c
tools/power/acpi/tools/acpidump/apmain.c

index 1be8d7a26c15fff480c9d9644914eeee0bec03e9..5883b73ea1b56053fbafb98f473b1f2c8a349c62 100644 (file)
@@ -79,9 +79,9 @@ Atmel High-Speed USB device controller
 
 Required properties:
  - compatible: Should be one of the following
-              "at91sam9rl-udc"
-              "at91sam9g45-udc"
-              "sama5d3-udc"
+              "atmel,at91sam9rl-udc"
+              "atmel,at91sam9g45-udc"
+              "atmel,sama5d3-udc"
  - reg: Address and length of the register set for the device
  - interrupts: Should contain usba interrupt
  - clocks: Should reference the peripheral and host clocks
index 51f4221657bff5b03c9a8ef44116d6e27ef27423..611c52267d24812423821a9f062a407414f86e36 100644 (file)
@@ -321,6 +321,7 @@ Code  Seq#(hex)     Include File            Comments
 0xDB   00-0F   drivers/char/mwave/mwavepub.h
 0xDD   00-3F   ZFCP device driver      see drivers/s390/scsi/
                                        <mailto:aherrman@de.ibm.com>
+0xE5   00-3F   linux/fuse.h
 0xEC   00-01   drivers/platform/chrome/cros_ec_dev.h   ChromeOS EC driver
 0xF3   00-3F   drivers/usb/misc/sisusbvga/sisusb.h     sisfb (in development)
                                        <mailto:thomas@winischhofer.net>
index afe7e2bbbc23cbc01eef6224824f1c4d199833c7..1d6f0459cd7bbe531b7acc2d85722ff62185729e 100644 (file)
@@ -293,6 +293,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
        acpi_os_name=   [HW,ACPI] Tell ACPI BIOS the name of the OS
                        Format: To spoof as Windows 98: ="Microsoft Windows"
 
+       acpi_rev_override [ACPI] Override the _REV object to return 5 (instead
+                       of 2 which is mandated by ACPI 6) as the supported ACPI
+                       specification revision (when using this switch, it may
+                       be necessary to carry out a cold reboot _twice_ in a
+                       row to make it take effect on the platform firmware).
+
        acpi_osi=       [HW,ACPI] Modify list of supported OS interface strings
                        acpi_osi="string1"      # add string1
                        acpi_osi="!string2"     # remove string2
index 993d4cfd5aa01e02e7aa3016a92dc74ec477eff7..86ea2084bc581d3ad7992da40c01c88157e9ce84 100644 (file)
@@ -4430,9 +4430,11 @@ FUSE: FILESYSTEM IN USERSPACE
 M:     Miklos Szeredi <miklos@szeredi.hu>
 L:     fuse-devel@lists.sourceforge.net
 W:     http://fuse.sourceforge.net/
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
 S:     Maintained
 F:     fs/fuse/
 F:     include/uapi/linux/fuse.h
+F:     Documentation/filesystems/fuse.txt
 
 FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
 M:     Rik Faith <faith@cs.unc.edu>
index 6c6f14628f329d0ba10f5632fb362c818c437ff5..26ac0281bc74e9bd8a4a4aab1c7c7a0c19d4436c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -335,15 +335,6 @@ endif
 export KBUILD_MODULES KBUILD_BUILTIN
 export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD
 
-ifneq ($(CC),)
-ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1)
-COMPILER := clang
-else
-COMPILER := gcc
-endif
-export COMPILER
-endif
-
 # We need some generic definitions (do not try to remake the file).
 scripts/Kbuild.include: ;
 include scripts/Kbuild.include
@@ -670,6 +661,13 @@ endif
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1)
+COMPILER := clang
+else
+COMPILER := gcc
+endif
+export COMPILER
+
 ifeq ($(COMPILER),clang)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,)
index 0854d4493da7a8518c241dabe06e91ce22eb5fe9..3de9b761cc1ab0fe7a8d3f0ea9caa7ca72e2a989 100644 (file)
        spi0_pins: spi0-pins {
                marvell,pins = "mpp36", "mpp37",
                               "mpp38", "mpp39";
-               marvell,function = "spi";
+               marvell,function = "spi0";
        };
 
        uart2_pins: uart2-pins {
index d260ba779ae53ce671db09142e99c573a23c22dd..18177f5a7464200453c9a82415b08bc73e6a6703 100644 (file)
                        usb2: gadget@fff78000 {
                                #address-cells = <1>;
                                #size-cells = <0>;
-                               compatible = "atmel,at91sam9rl-udc";
+                               compatible = "atmel,at91sam9g45-udc";
                                reg = <0x00600000 0x80000
                                       0xfff78000 0x400>;
                                interrupts = <27 IRQ_TYPE_LEVEL_HIGH 0>;
index 7521bdf17ef25ab133e61f7c7a71fd0a189004d2..b6c8df8d380ea41c9ed9807fb15dd4708d23a913 100644 (file)
                        usb2: gadget@f803c000 {
                                #address-cells = <1>;
                                #size-cells = <0>;
-                               compatible = "atmel,at91sam9rl-udc";
+                               compatible = "atmel,at91sam9g45-udc";
                                reg = <0x00500000 0x80000
                                       0xf803c000 0x400>;
                                interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>;
index 5ab7548e04e1f459eb7105e5808bcc3e54f938b1..9e2444b07bceee0153c69a83e22c8fd7a54c06fe 100644 (file)
                usb0: gadget@00500000 {
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       compatible = "atmel,at91sam9rl-udc";
+                       compatible = "atmel,sama5d3-udc";
                        reg = <0x00500000 0x100000
                               0xf8030000 0x4000>;
                        interrupts = <33 IRQ_TYPE_LEVEL_HIGH 2>;
index 653a1f851f2b8f5bf641cb586ede4f0841105c6b..3ee22ee13c5a899fba199321cb55b6a3a15221b1 100644 (file)
                usb0: gadget@00400000 {
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       compatible = "atmel,at91sam9rl-udc";
+                       compatible = "atmel,sama5d3-udc";
                        reg = <0x00400000 0x100000
                               0xfc02c000 0x4000>;
                        interrupts = <47 IRQ_TYPE_LEVEL_HIGH 2>;
index fd6a6d23bc20b0f470c757a1592d88cf84ac5a9c..6d83a1bf0c7494593eb608ddba29f2bfea8d8455 100644 (file)
@@ -169,6 +169,7 @@ CONFIG_MTD_BLOCK=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_ATMEL=y
+CONFIG_MTD_NAND_BRCMNAND=y
 CONFIG_MTD_NAND_DAVINCI=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_MTD_UBI=y
index f8f7398c74c2d355d63a2bf3ef3faec225d2011c..7dac3086e361c8e3680d5209e5b72184c9c48cdb 100644 (file)
@@ -15,6 +15,8 @@
  *  that causes it to save wrong values...  Be aware!
  */
 
+#include <linux/init.h>
+
 #include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/glue-df.h>
index e9184feffc4e5b55d46008be3f3587d2756ea6e3..0ac9e4b3b26525b1ff466ad60a92f6a79a1204c9 100644 (file)
@@ -19,7 +19,6 @@ config ARCH_BCM_IPROC
        select ARCH_REQUIRE_GPIOLIB
        select ARM_AMBA
        select PINCTRL
-       select MTD_NAND_BRCMNAND
        help
          This enables support for systems based on Broadcom IPROC architected SoCs.
          The IPROC complex contains one or more ARM CPUs along with common
index 03d401d20453eb447f15ab5dca15cd80967d2baf..3f29e6bca058623504e9c87c1b46fc4afefe7aaf 100644 (file)
 /*
  * Dove Low Interrupt Controller
  */
-#define IRQ_DOVE_BRIDGE                0
-#define IRQ_DOVE_H2C           1
-#define IRQ_DOVE_C2H           2
-#define IRQ_DOVE_NAND          3
-#define IRQ_DOVE_PDMA          4
-#define IRQ_DOVE_SPI1          5
-#define IRQ_DOVE_SPI0          6
-#define IRQ_DOVE_UART_0                7
-#define IRQ_DOVE_UART_1                8
-#define IRQ_DOVE_UART_2                9
-#define IRQ_DOVE_UART_3                10
-#define IRQ_DOVE_I2C           11
-#define IRQ_DOVE_GPIO_0_7      12
-#define IRQ_DOVE_GPIO_8_15     13
-#define IRQ_DOVE_GPIO_16_23    14
-#define IRQ_DOVE_PCIE0_ERR     15
-#define IRQ_DOVE_PCIE0         16
-#define IRQ_DOVE_PCIE1_ERR     17
-#define IRQ_DOVE_PCIE1         18
-#define IRQ_DOVE_I2S0          19
-#define IRQ_DOVE_I2S0_ERR      20
-#define IRQ_DOVE_I2S1          21
-#define IRQ_DOVE_I2S1_ERR      22
-#define IRQ_DOVE_USB_ERR       23
-#define IRQ_DOVE_USB0          24
-#define IRQ_DOVE_USB1          25
-#define IRQ_DOVE_GE00_RX       26
-#define IRQ_DOVE_GE00_TX       27
-#define IRQ_DOVE_GE00_MISC     28
-#define IRQ_DOVE_GE00_SUM      29
-#define IRQ_DOVE_GE00_ERR      30
-#define IRQ_DOVE_CRYPTO                31
+#define IRQ_DOVE_BRIDGE                (1 + 0)
+#define IRQ_DOVE_H2C           (1 + 1)
+#define IRQ_DOVE_C2H           (1 + 2)
+#define IRQ_DOVE_NAND          (1 + 3)
+#define IRQ_DOVE_PDMA          (1 + 4)
+#define IRQ_DOVE_SPI1          (1 + 5)
+#define IRQ_DOVE_SPI0          (1 + 6)
+#define IRQ_DOVE_UART_0                (1 + 7)
+#define IRQ_DOVE_UART_1                (1 + 8)
+#define IRQ_DOVE_UART_2                (1 + 9)
+#define IRQ_DOVE_UART_3                (1 + 10)
+#define IRQ_DOVE_I2C           (1 + 11)
+#define IRQ_DOVE_GPIO_0_7      (1 + 12)
+#define IRQ_DOVE_GPIO_8_15     (1 + 13)
+#define IRQ_DOVE_GPIO_16_23    (1 + 14)
+#define IRQ_DOVE_PCIE0_ERR     (1 + 15)
+#define IRQ_DOVE_PCIE0         (1 + 16)
+#define IRQ_DOVE_PCIE1_ERR     (1 + 17)
+#define IRQ_DOVE_PCIE1         (1 + 18)
+#define IRQ_DOVE_I2S0          (1 + 19)
+#define IRQ_DOVE_I2S0_ERR      (1 + 20)
+#define IRQ_DOVE_I2S1          (1 + 21)
+#define IRQ_DOVE_I2S1_ERR      (1 + 22)
+#define IRQ_DOVE_USB_ERR       (1 + 23)
+#define IRQ_DOVE_USB0          (1 + 24)
+#define IRQ_DOVE_USB1          (1 + 25)
+#define IRQ_DOVE_GE00_RX       (1 + 26)
+#define IRQ_DOVE_GE00_TX       (1 + 27)
+#define IRQ_DOVE_GE00_MISC     (1 + 28)
+#define IRQ_DOVE_GE00_SUM      (1 + 29)
+#define IRQ_DOVE_GE00_ERR      (1 + 30)
+#define IRQ_DOVE_CRYPTO                (1 + 31)
 
 /*
  * Dove High Interrupt Controller
  */
-#define IRQ_DOVE_AC97          32
-#define IRQ_DOVE_PMU           33
-#define IRQ_DOVE_CAM           34
-#define IRQ_DOVE_SDIO0         35
-#define IRQ_DOVE_SDIO1         36
-#define IRQ_DOVE_SDIO0_WAKEUP  37
-#define IRQ_DOVE_SDIO1_WAKEUP  38
-#define IRQ_DOVE_XOR_00                39
-#define IRQ_DOVE_XOR_01                40
-#define IRQ_DOVE_XOR0_ERR      41
-#define IRQ_DOVE_XOR_10                42
-#define IRQ_DOVE_XOR_11                43
-#define IRQ_DOVE_XOR1_ERR      44
-#define IRQ_DOVE_LCD_DCON      45
-#define IRQ_DOVE_LCD1          46
-#define IRQ_DOVE_LCD0          47
-#define IRQ_DOVE_GPU           48
-#define IRQ_DOVE_PERFORM_MNTR  49
-#define IRQ_DOVE_VPRO_DMA1     51
-#define IRQ_DOVE_SSP_TIMER     54
-#define IRQ_DOVE_SSP           55
-#define IRQ_DOVE_MC_L2_ERR     56
-#define IRQ_DOVE_CRYPTO_ERR    59
-#define IRQ_DOVE_GPIO_24_31    60
-#define IRQ_DOVE_HIGH_GPIO     61
-#define IRQ_DOVE_SATA          62
+#define IRQ_DOVE_AC97          (1 + 32)
+#define IRQ_DOVE_PMU           (1 + 33)
+#define IRQ_DOVE_CAM           (1 + 34)
+#define IRQ_DOVE_SDIO0         (1 + 35)
+#define IRQ_DOVE_SDIO1         (1 + 36)
+#define IRQ_DOVE_SDIO0_WAKEUP  (1 + 37)
+#define IRQ_DOVE_SDIO1_WAKEUP  (1 + 38)
+#define IRQ_DOVE_XOR_00                (1 + 39)
+#define IRQ_DOVE_XOR_01                (1 + 40)
+#define IRQ_DOVE_XOR0_ERR      (1 + 41)
+#define IRQ_DOVE_XOR_10                (1 + 42)
+#define IRQ_DOVE_XOR_11                (1 + 43)
+#define IRQ_DOVE_XOR1_ERR      (1 + 44)
+#define IRQ_DOVE_LCD_DCON      (1 + 45)
+#define IRQ_DOVE_LCD1          (1 + 46)
+#define IRQ_DOVE_LCD0          (1 + 47)
+#define IRQ_DOVE_GPU           (1 + 48)
+#define IRQ_DOVE_PERFORM_MNTR  (1 + 49)
+#define IRQ_DOVE_VPRO_DMA1     (1 + 51)
+#define IRQ_DOVE_SSP_TIMER     (1 + 54)
+#define IRQ_DOVE_SSP           (1 + 55)
+#define IRQ_DOVE_MC_L2_ERR     (1 + 56)
+#define IRQ_DOVE_CRYPTO_ERR    (1 + 59)
+#define IRQ_DOVE_GPIO_24_31    (1 + 60)
+#define IRQ_DOVE_HIGH_GPIO     (1 + 61)
+#define IRQ_DOVE_SATA          (1 + 62)
 
 /*
  * DOVE General Purpose Pins
  */
-#define IRQ_DOVE_GPIO_START    64
+#define IRQ_DOVE_GPIO_START    65
 #define NR_GPIO_IRQS           64
 
 /*
index 4a5a7aedcb763e9673dd8d51b416c6f1024e3310..df0223f76fa92d8752f31d2ccb1dd40e18d11a4f 100644 (file)
@@ -126,14 +126,14 @@ __exception_irq_entry dove_legacy_handle_irq(struct pt_regs *regs)
        stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_LOW_OFF);
        stat &= readl_relaxed(dove_irq_base + IRQ_MASK_LOW_OFF);
        if (stat) {
-               unsigned int hwirq = __fls(stat);
+               unsigned int hwirq = 1 + __fls(stat);
                handle_IRQ(hwirq, regs);
                return;
        }
        stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_HIGH_OFF);
        stat &= readl_relaxed(dove_irq_base + IRQ_MASK_HIGH_OFF);
        if (stat) {
-               unsigned int hwirq = 32 + __fls(stat);
+               unsigned int hwirq = 33 + __fls(stat);
                handle_IRQ(hwirq, regs);
                return;
        }
@@ -144,8 +144,8 @@ void __init dove_init_irq(void)
 {
        int i;
 
-       orion_irq_init(0, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF);
-       orion_irq_init(32, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF);
+       orion_irq_init(1, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF);
+       orion_irq_init(33, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF);
 
 #ifdef CONFIG_MULTI_IRQ_HANDLER
        set_handle_irq(dove_legacy_handle_irq);
index 48e4c4b3cd1c9a52f6e5580c531088e10aac8662..b093a196e80176d44cc083ee89d51b6fee1318c4 100644 (file)
  */
 
 #include <linux/linkage.h>
-#include <linux/init.h>
 
 #include <asm/assembler.h>
 
-       __CPUINIT
-
 ENTRY(mvebu_cortex_a9_secondary_startup)
 ARM_BE8(setend be)
        bl      armada_38x_scu_power_up
index df0a9cc5da59ad2ce7a95e7d13dc8e2a82937069..3d5000481c112dda6c0ee32a0ce036f3b08c9b53 100644 (file)
@@ -24,7 +24,7 @@
 
 extern void mvebu_cortex_a9_secondary_startup(void);
 
-static int __cpuinit mvebu_cortex_a9_boot_secondary(unsigned int cpu,
+static int mvebu_cortex_a9_boot_secondary(unsigned int cpu,
                                                    struct task_struct *idle)
 {
        int ret, hw_cpu;
index 6dfd4ab97b2aaf2e6982de227dd17e403dce7d24..301ab38d38ba884e194657d3e1173576b7f43ad0 100644 (file)
@@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd)
        for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++)
                ackcmd |= BIT(pic_raw_gpios[i]);
 
+       srcmd = cpu_to_le32(srcmd);
+       ackcmd = cpu_to_le32(ackcmd);
+
        /*
         * Wait a while, the PIC needs quite a bit of time between the
         * two GPIO commands.
index 2e6ab67e2284497f9fc1d8fe323c2daaa4f34809..8fcec1cc101e09be617340f8c7e91b6e004b636f 100644 (file)
@@ -119,8 +119,7 @@ static int pmu_set_power_domain(int pd, bool on)
  * Handling of CPU cores
  */
 
-static int __cpuinit rockchip_boot_secondary(unsigned int cpu,
-                                            struct task_struct *idle)
+static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
        int ret;
 
index f61158c6ce7185a3b30ef396116a7fc2d74b3c72..5766ce2be32bbd28452c95ea9e43e92de0f4dcb0 100644 (file)
@@ -589,4 +589,4 @@ static int __init ve_spc_clk_init(void)
        platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0);
        return 0;
 }
-module_init(ve_spc_clk_init);
+device_initcall(ve_spc_clk_init);
index 0bb287ca0a98e8dcaac6ebd6497d762005b27013..0689c3fb56e3d84fe3ed7790f4a6b25835c86555 100644 (file)
                        phy-names = "sata-phy";
                };
 
+               sbgpio: sbgpio@17001000{
+                       compatible = "apm,xgene-gpio-sb";
+                       reg = <0x0 0x17001000 0x0 0x400>;
+                       #gpio-cells = <2>;
+                       gpio-controller;
+                       interrupts =    <0x0 0x28 0x1>,
+                                       <0x0 0x29 0x1>,
+                                       <0x0 0x2a 0x1>,
+                                       <0x0 0x2b 0x1>,
+                                       <0x0 0x2c 0x1>,
+                                       <0x0 0x2d 0x1>;
+               };
+
                rtc: rtc@10510000 {
                        compatible = "apm,xgene-rtc";
                        reg = <0x0 0x10510000 0x0 0x400>;
index 5047a33043bdf4bfa4d6ae3226f7385a6c703d37..f679a19dfeb8bbd857d8c577b14baeccf05d3865 100644 (file)
@@ -848,5 +848,4 @@ static void eeprom_disable_write_protect(void)
     /* Write protect disabled */
   }
 }
-
-module_init(eeprom_init);
+device_initcall(eeprom_init);
index 1b17d92cef8ebb98e57b34a63be0f6f8fb6d9508..9ef56092a4c54f9cdd49255ae6683bf33adeb72a 100644 (file)
@@ -145,6 +145,5 @@ unsigned long crisv32_intmem_virt_to_phys(void* addr)
                (unsigned long)intmem_virtual + MEM_INTMEM_START +
                RESERVED_SIZE);
 }
-
-module_init(crisv32_intmem_init);
+device_initcall(crisv32_intmem_init);
 
index c0e3707c2299bae3f4cb5cf1a1c7ded959f361e9..e1cf802d1639bb431cef9f27e8bfa29c3c2505f6 100644 (file)
@@ -9,7 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-#include <linux/init.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
index 3a428f19a00116ad963f7cdae2ddf469ec21a054..085047f3a545b283c41ec546cb2b71e623a4c7fa 100644 (file)
@@ -368,13 +368,4 @@ simscsi_init(void)
        scsi_host_put(host);
        return error;
 }
-
-static void __exit
-simscsi_exit(void)
-{
-       scsi_remove_host(host);
-       scsi_host_put(host);
-}
-
-module_init(simscsi_init);
-module_exit(simscsi_exit);
+device_initcall(simscsi_init);
index 7f3028965064b1af467bfe93028e1277006fabc1..97e48b0eefc7c18f54f0d1ee76860eed43a53c4e 100644 (file)
@@ -215,10 +215,6 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
        pmd_t *pmd;
        pte_t *pte;
 
-       if (!PageReserved(page))
-               printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n",
-                      page_address(page));
-
        pgd = pgd_offset_k(address);            /* note: this is NOT pgd_offset()! */
 
        {
index 27793f7aa99c3c406b5547511a59347b05dd0210..5b799d4deb747c481f3ad7a47d1c75374fa92715 100644 (file)
@@ -142,5 +142,4 @@ static int __init sn_salinfo_init(void)
                salinfo_platform_oemdata = &sn_salinfo_platform_oemdata;
        return 0;
 }
-
-module_init(sn_salinfo_init)
+device_initcall(sn_salinfo_init);
index 17fe083fcb6fa985cec4f108036ad490e11b785d..b03d8738d67cd682ef0a96d8770f8bb9ebc4fbc6 100644 (file)
@@ -96,5 +96,4 @@ static int __init asb2303_mtd_init(void)
        platform_device_register(&asb2303_sysflash);
        return 0;
 }
-
-module_init(asb2303_mtd_init);
+device_initcall(asb2303_mtd_init);
index d5cae55195ecfd4f108b1431ecf52193b5a8d282..10a5ae9553fd657211524e4af5257958815e134a 100644 (file)
@@ -207,8 +207,7 @@ static int __init pdc_console_tty_driver_init(void)
 
        return 0;
 }
-
-module_init(pdc_console_tty_driver_init);
+device_initcall(pdc_console_tty_driver_init);
 
 static struct tty_driver * pdc_console_device (struct console *c, int *index)
 {
index ba0c053e25ae9d66cf536ab87ce1cca9eef53dbf..518f4f5f1f43ec6b2dcaceb9b2f5c9536097d59f 100644 (file)
@@ -543,6 +543,7 @@ static int __init perf_init(void)
 
        return 0;
 }
+device_initcall(perf_init);
 
 /*
  * perf_start_counters(void)
@@ -847,5 +848,3 @@ printk("perf_rdr_write\n");
        }
 printk("perf_rdr_write done\n");
 }
-
-module_init(perf_init);
index 56f44848b044b67c7d3b84caf82a9a9ed6fb493a..43922509a4833e8da175b668e9be588daa123f4f 100644 (file)
@@ -1124,4 +1124,4 @@ static int __init rtc_init(void)
        return PTR_ERR_OR_ZERO(pdev);
 }
 
-module_init(rtc_init);
+device_initcall(rtc_init);
index 1f614d778a8b5ffc2b1cf32465fce3a579839f3a..bb0bd7025cb88f893af04d3f98141860c038ee54 100644 (file)
@@ -928,7 +928,7 @@ static int __init hugetlbpage_init(void)
        return 0;
 }
 #endif
-module_init(hugetlbpage_init);
+arch_initcall(hugetlbpage_init);
 
 void flush_dcache_icache_hugepage(struct page *page)
 {
index c9adbfb65006e153aa8f6caf0d0fd5b396cf2997..fcbea4b51a7821ac2865622de1440a9aaa766449 100644 (file)
@@ -445,5 +445,4 @@ static int pmc_init(void)
 {
        return platform_driver_register(&pmc_driver);
 }
-
-module_init(pmc_init);
+device_initcall(pmc_init);
index ce73ce865613b0521330f8e896f471daf6da9a15..791c6142c4a7bd0185437890350381c529779a54 100644 (file)
@@ -92,5 +92,4 @@ static int __init ps3_rtc_init(void)
 
        return PTR_ERR_OR_ZERO(pdev);
 }
-
-module_init(ps3_rtc_init);
+device_initcall(ps3_rtc_init);
index d631022ffb4b3c77a4fe057c3c6da91457a20c64..38138cf8d33e2213804dad3e903c4037a59db605 100644 (file)
@@ -407,4 +407,4 @@ static int __init fsl_lbc_init(void)
 {
        return platform_driver_register(&fsl_lbc_ctrl_driver);
 }
-module_init(fsl_lbc_init);
+subsys_initcall(fsl_lbc_init);
index 522786318d36c4f5dbee20c8cbdee94009bbf201..40e2b585d4887b7bc48361395c0b9881ecf64970 100644 (file)
@@ -10,7 +10,7 @@
  * for more details.
  */
 #include <linux/io.h>
-#include <linux/init.h>
+#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <mach/highlander.h>
index bef83522f958c25ed186dc21187963607757ed89..5192b1f43ada5ce884bc4a8abc3f1e0f53e7cc1d 100644 (file)
@@ -140,4 +140,4 @@ static int __init psw_init(void)
 {
        return platform_add_devices(psw_devices, ARRAY_SIZE(psw_devices));
 }
-module_init(psw_init);
+device_initcall(psw_init);
index 5af8debc6a71aab44a6cf48e63d44f7343f2fca4..f0da5a237e94077ced050b6f3d746c89d44bd341 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/usb/tilegx.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 static u64 ehci_dmamask = DMA_BIT_MASK(32);
index 5de7f4c5697136e180e4a24e153bde77c7112b8c..52c8e3c7789dc81f6f6bd5ae60a6672f500dc8ca 100644 (file)
@@ -98,4 +98,4 @@ static int __init sbf_init(void)
 
        return 0;
 }
-module_init(sbf_init);
+arch_initcall(sbf_init);
index 7795f3f8b1d57198469ded20ac9a1244035428e8..43dd672d788bcc8a07106f8c84c0aeae786cb49e 100644 (file)
@@ -530,5 +530,4 @@ static __init int bts_init(void)
 
        return perf_pmu_register(&bts_pmu, "intel_bts", -1);
 }
-
-module_init(bts_init);
+arch_initcall(bts_init);
index 159887c3a89d66a4aaad415ddc069b25904b5676..183de719628d2a4740d20a173a6c950f35c322fa 100644 (file)
@@ -1106,5 +1106,4 @@ static __init int pt_init(void)
 
        return ret;
 }
-
-module_init(pt_init);
+arch_initcall(pt_init);
index 5ee771859b6f6e144090efe8f315e0c7707978b3..1f4acd68b98bccb7bf4032bc6ff2bde3f4efecdb 100644 (file)
@@ -65,7 +65,7 @@ static int __init add_bus_probe(void)
 
        return of_platform_bus_probe(NULL, ce4100_ids, NULL);
 }
-module_init(add_bus_probe);
+device_initcall(add_bus_probe);
 
 #ifdef CONFIG_PCI
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
index ee22c1d93ae5c4c5ffe065981d591507a0511a07..b034b1b14b9c66ab77b24d9757ce10de7f0f318e 100644 (file)
@@ -72,7 +72,7 @@ asmlinkage __visible void vsmp_irq_enable(void)
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
 
-static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
                                  unsigned long addr, unsigned len)
 {
        switch (type) {
index 32947ba0f62dad08eaaf712848bff024ea396a46..ee40fcb6e54dd5f816819bf2f048dcc9177ce95c 100644 (file)
@@ -173,5 +173,4 @@ static int __init intel_mid_device_create(void)
 
        return platform_device_register(&vrtc_device);
 }
-
-module_init(intel_mid_device_create);
+device_initcall(intel_mid_device_create);
index 17b1ef3232e4833349c2f64ed19e08dede454b52..8ab021b1f14128d49948de51f685a78fd7180fa2 100644 (file)
@@ -681,6 +681,4 @@ static int iss_net_init(void)
 
        return 1;
 }
-
-module_init(iss_net_init);
-
+device_initcall(iss_net_init);
index 751f8fd7335db2203f7257edc8ad680dc7ea2a14..3d13b042da735823b7c6425f93cce9cff82e0abe 100644 (file)
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) "PKCS7key: "fmt
 #include <linux/key.h>
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/key-type.h>
 #include <crypto/pkcs7.h>
 #include <keys/user-type.h>
index f15db002be8ec238dabac8114e862b8ec140255a..114cf48085abd34eacb341fae1f246e2fdfb4b41 100644 (file)
@@ -80,6 +80,26 @@ config ACPI_PROCFS_POWER
 
          Say N to delete power /proc/acpi/ directories that have moved to /sys/
 
+config ACPI_REV_OVERRIDE_POSSIBLE
+       bool "Allow supported ACPI revision to be overriden"
+       depends on X86
+       default y
+       help
+         The platform firmware on some systems expects Linux to return "5" as
+         the supported ACPI revision which makes it expose system configuration
+         information in a special way.
+
+         For example, based on what ACPI exports as the supported revision,
+         Dell XPS 13 (2015) configures its audio device to either work in HDA
+         mode or in I2S mode, where the former is supposed to be used on Linux
+         until the latter is fully supported (in the kernel as well as in user
+         space).
+
+         This option enables a DMI-based quirk for the above Dell machine (so
+         that HDA audio is exposed by the platform firmware to the kernel) and
+         makes it possible to force the kernel to return "5" as the supported
+         ACPI revision via the "acpi_rev_override" command line switch.
+
 config ACPI_EC_DEBUGFS
        tristate "EC read/write access through /sys/kernel/debug/ec"
        default n
index 853aa2dbdb61d203d7d01090895620c1929f1421..a8d8092ee39152920caf04f8e862b414ba43d417 100644 (file)
@@ -59,5 +59,8 @@
 #include "acglobal.h"          /* All global variables */
 #include "achware.h"           /* Hardware defines and interfaces */
 #include "acutils.h"           /* Utility interfaces */
+#ifndef ACPI_USE_SYSTEM_CLIBRARY
+#include "acclib.h"            /* C library interfaces */
+#endif                         /* !ACPI_USE_SYSTEM_CLIBRARY */
 
 #endif                         /* __ACCOMMON_H__ */
index a0c47878431422be01acae9e41c7a5d3a7c8c33e..53f96a3707624416d8ae6773027a1e693a51d623 100644 (file)
@@ -61,6 +61,8 @@ ACPI_GLOBAL(struct acpi_table_header, acpi_gbl_original_dsdt_header);
 
 #if (!ACPI_REDUCED_HARDWARE)
 ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_FACS);
+ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs32);
+ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs64);
 
 #endif                         /* !ACPI_REDUCED_HARDWARE */
 
index 1886bde54b5d323e0d9543dbac41245d7b8e1075..7ac98000b46b61dd417fe260315000092b2be861 100644 (file)
@@ -468,6 +468,8 @@ void acpi_ex_eisa_id_to_string(char *dest, u64 compressed_id);
 
 void acpi_ex_integer_to_string(char *dest, u64 value);
 
+void acpi_ex_pci_cls_to_string(char *dest, u8 class_code[3]);
+
 u8 acpi_is_valid_space_id(u8 space_id);
 
 /*
index ffdb956391f614786ba2580fede7a7ce529b0897..bc600969c6a1551f81bf4a1a40c522124dd5a39a 100644 (file)
@@ -213,6 +213,7 @@ struct acpi_table_list {
 
 #define ACPI_TABLE_INDEX_DSDT           (0)
 #define ACPI_TABLE_INDEX_FACS           (1)
+#define ACPI_TABLE_INDEX_X_FACS         (2)
 
 struct acpi_find_context {
        char *search_for;
index 952fbe0b7231a79c62f463bd36e8bce596c8c478..0dd088290d80588fa1e4f7545c70d712e8b7c86c 100644 (file)
@@ -66,6 +66,7 @@
 #define ACPI_NS_PREFIX_IS_SCOPE     0x10
 #define ACPI_NS_EXTERNAL            0x20
 #define ACPI_NS_TEMPORARY           0x40
+#define ACPI_NS_OVERRIDE_IF_FOUND   0x80
 
 /* Flags for acpi_ns_walk_namespace */
 
index 3e9720e1f34f79464abbeb0fe7cb6ca7977a7fe0..c81d98d09cace4e747531511f3d95971372713a9 100644 (file)
@@ -335,6 +335,7 @@ struct acpi_object_reference {
        void *object;           /* name_op=>HANDLE to obj, index_op=>union acpi_operand_object */
        struct acpi_namespace_node *node;       /* ref_of or Namepath */
        union acpi_operand_object **where;      /* Target of Index */
+       u8 *index_pointer;      /* Used for Buffers and Strings */
        u32 value;              /* Used for Local/Arg/Index/ddb_handle */
 };
 
index 87c7860b3394b483dd3c0fe6c039e1d589682d65..44997ca02ae26a59f4e2efe0992d6c80dbcc32b7 100644 (file)
@@ -82,6 +82,7 @@ struct acpi_walk_state {
        u8 return_used;
        u8 scope_depth;
        u8 pass_number;         /* Parse pass during table load */
+       u8 namespace_override;  /* Override existing objects */
        u8 result_size;         /* Total elements for the result stack */
        u8 result_count;        /* Current number of occupied elements of result stack */
        u32 aml_offset;
index d49f5c7a20d90197ece8b625dd2fcd22fca4c4b1..6de0d3573037a99651037628d69e02d8bceb7498 100644 (file)
@@ -205,66 +205,6 @@ acpi_status acpi_ut_hardware_initialize(void);
 
 void acpi_ut_subsystem_shutdown(void);
 
-/*
- * utclib - Local implementations of C library functions
- */
-#ifndef ACPI_USE_SYSTEM_CLIBRARY
-
-acpi_size acpi_ut_strlen(const char *string);
-
-char *acpi_ut_strchr(const char *string, int ch);
-
-char *acpi_ut_strcpy(char *dst_string, const char *src_string);
-
-char *acpi_ut_strncpy(char *dst_string,
-                     const char *src_string, acpi_size count);
-
-int acpi_ut_memcmp(const char *buffer1, const char *buffer2, acpi_size count);
-
-int acpi_ut_strncmp(const char *string1, const char *string2, acpi_size count);
-
-int acpi_ut_strcmp(const char *string1, const char *string2);
-
-char *acpi_ut_strcat(char *dst_string, const char *src_string);
-
-char *acpi_ut_strncat(char *dst_string,
-                     const char *src_string, acpi_size count);
-
-u32 acpi_ut_strtoul(const char *string, char **terminator, u32 base);
-
-char *acpi_ut_strstr(char *string1, char *string2);
-
-void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count);
-
-void *acpi_ut_memset(void *dest, u8 value, acpi_size count);
-
-int acpi_ut_to_upper(int c);
-
-int acpi_ut_to_lower(int c);
-
-extern const u8 _acpi_ctype[];
-
-#define _ACPI_XA     0x00      /* extra alphabetic - not supported */
-#define _ACPI_XS     0x40      /* extra space */
-#define _ACPI_BB     0x00      /* BEL, BS, etc. - not supported */
-#define _ACPI_CN     0x20      /* CR, FF, HT, NL, VT */
-#define _ACPI_DI     0x04      /* '0'-'9' */
-#define _ACPI_LO     0x02      /* 'a'-'z' */
-#define _ACPI_PU     0x10      /* punctuation */
-#define _ACPI_SP     0x08      /* space, tab, CR, LF, VT, FF */
-#define _ACPI_UP     0x01      /* 'A'-'Z' */
-#define _ACPI_XD     0x80      /* '0'-'9', 'A'-'F', 'a'-'f' */
-
-#define ACPI_IS_DIGIT(c)  (_acpi_ctype[(unsigned char)(c)] & (_ACPI_DI))
-#define ACPI_IS_SPACE(c)  (_acpi_ctype[(unsigned char)(c)] & (_ACPI_SP))
-#define ACPI_IS_XDIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_XD))
-#define ACPI_IS_UPPER(c)  (_acpi_ctype[(unsigned char)(c)] & (_ACPI_UP))
-#define ACPI_IS_LOWER(c)  (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO))
-#define ACPI_IS_PRINT(c)  (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP | _ACPI_DI | _ACPI_XS | _ACPI_PU))
-#define ACPI_IS_ALPHA(c)  (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP))
-
-#endif                         /* !ACPI_USE_SYSTEM_CLIBRARY */
-
 #define ACPI_IS_ASCII(c)  ((c) < 0x80)
 
 /*
@@ -430,6 +370,10 @@ acpi_status
 acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
                    struct acpi_pnp_device_id_list ** return_cid_list);
 
+acpi_status
+acpi_ut_execute_CLS(struct acpi_namespace_node *device_node,
+                   struct acpi_pnp_device_id **return_id);
+
 /*
  * utlock - reader/writer locks
  */
index 43b40de90484cbb9f44338bb2d74dbb4bf4af291..20de148594fdc0459e1ddd589abe999f71c0003f 100644 (file)
@@ -502,7 +502,7 @@ acpi_ds_create_field(union acpi_parse_object *op,
                }
        }
 
-       ACPI_MEMSET(&info, 0, sizeof(struct acpi_create_field_info));
+       memset(&info, 0, sizeof(struct acpi_create_field_info));
 
        /* Second arg is the field flags */
 
index bbe74bcebbae882f2bb1da3211b4f4aad347534c..95779e8ec3bb2ab16eb69b8d14a729812aef8df4 100644 (file)
@@ -207,7 +207,7 @@ acpi_ds_initialize_objects(u32 table_index,
 
        /* Set all init info to zero */
 
-       ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info));
+       memset(&info, 0, sizeof(struct acpi_init_walk_info));
 
        info.owner_id = owner_id;
        info.table_index = table_index;
index 8a7b07b6adc81a761cefcb2689c82de187eb82b2..2beb7fd674ae14a20b43031254d874afbd859886 100644 (file)
@@ -339,8 +339,8 @@ acpi_ds_build_internal_buffer_obj(struct acpi_walk_state *walk_state,
                /* Initialize buffer from the byte_list (if present) */
 
                if (byte_list) {
-                       ACPI_MEMCPY(obj_desc->buffer.pointer,
-                                   byte_list->named.data, byte_list_length);
+                       memcpy(obj_desc->buffer.pointer, byte_list->named.data,
+                              byte_list_length);
                }
        }
 
@@ -750,8 +750,7 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
        case ACPI_TYPE_STRING:
 
                obj_desc->string.pointer = op->common.value.string;
-               obj_desc->string.length =
-                   (u32) ACPI_STRLEN(op->common.value.string);
+               obj_desc->string.length = (u32)strlen(op->common.value.string);
 
                /*
                 * The string is contained in the ACPI table, don't ever try
index deeddd6d2f0523fbc5aff635acf63c3794a11b23..ebc577baeaf9fb14e29796a9286d22ca107fa6b5 100644 (file)
@@ -572,8 +572,8 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state,
                                        obj_desc =
                                            acpi_ut_create_string_object((acpi_size) name_length);
 
-                                       ACPI_STRNCPY(obj_desc->string.pointer,
-                                                    name_string, name_length);
+                                       strncpy(obj_desc->string.pointer,
+                                               name_string, name_length);
                                        status = AE_OK;
                                } else {
                                        /*
index 843942fb4be501c8dcc3d15ced1729a152489011..845ff44919c3713f8725f08daff16d9b9679e338 100644 (file)
@@ -315,10 +315,19 @@ acpi_ds_load1_begin_op(struct acpi_walk_state * walk_state,
                flags = ACPI_NS_NO_UPSEARCH;
                if ((walk_state->opcode != AML_SCOPE_OP) &&
                    (!(walk_state->parse_flags & ACPI_PARSE_DEFERRED_OP))) {
-                       flags |= ACPI_NS_ERROR_IF_FOUND;
-                       ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
-                                         "[%s] Cannot already exist\n",
-                                         acpi_ut_get_type_name(object_type)));
+                       if (walk_state->namespace_override) {
+                               flags |= ACPI_NS_OVERRIDE_IF_FOUND;
+                               ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
+                                                 "[%s] Override allowed\n",
+                                                 acpi_ut_get_type_name
+                                                 (object_type)));
+                       } else {
+                               flags |= ACPI_NS_ERROR_IF_FOUND;
+                               ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
+                                                 "[%s] Cannot already exist\n",
+                                                 acpi_ut_get_type_name
+                                                 (object_type)));
+                       }
                } else {
                        ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
                                          "[%s] Both Find or Create allowed\n",
index 8840296d5b205080045de3fd93a97f046bcb91f5..ea4c0d3fca2d820edfbdb219f33d9718f32cb734 100644 (file)
@@ -377,7 +377,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 
        /* 4) The last two characters of the name are the hex GPE Number */
 
-       gpe_number = ACPI_STRTOUL(&name[2], NULL, 16);
+       gpe_number = strtoul(&name[2], NULL, 16);
        if (gpe_number == ACPI_UINT32_MAX) {
 
                /* Conversion failed; invalid method, just ignore it */
index 6e0df2b9d5a475f329f2d40bfda2b592a26b764c..24a4c5c2b124825b5616371882ffa0f9666cdc85 100644 (file)
@@ -470,7 +470,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc,
                        return_ACPI_STATUS(AE_NO_MEMORY);
                }
 
-               ACPI_MEMCPY(table, table_header, length);
+               memcpy(table, table_header, length);
                break;
 
        default:
index 89a976b4ccf2ad9f8d01ed702b2288d5080c64a2..075d654c837f27e767ebed874e68140b97fc12ff 100644 (file)
@@ -227,9 +227,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc,
                /* Copy the integer to the buffer, LSB first */
 
                new_buf = return_desc->buffer.pointer;
-               ACPI_MEMCPY(new_buf,
-                           &obj_desc->integer.value,
-                           acpi_gbl_integer_byte_width);
+               memcpy(new_buf,
+                      &obj_desc->integer.value, acpi_gbl_integer_byte_width);
                break;
 
        case ACPI_TYPE_STRING:
@@ -252,8 +251,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc,
                /* Copy the string to the buffer */
 
                new_buf = return_desc->buffer.pointer;
-               ACPI_STRNCPY((char *)new_buf, (char *)obj_desc->string.pointer,
-                            obj_desc->string.length);
+               strncpy((char *)new_buf, (char *)obj_desc->string.pointer,
+                       obj_desc->string.length);
                break;
 
        default:
index e67d0aca3fe68b23a9f84284c1a017736a708b8a..815442bbd0518e6d2ee154971966eff3a760c6d5 100644 (file)
@@ -76,6 +76,8 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc,
 {
        u32 i;
        u32 timer;
+       union acpi_operand_object *object_desc;
+       u32 value;
 
        ACPI_FUNCTION_TRACE_PTR(ex_do_debug_object, source_desc);
 
@@ -254,8 +256,44 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc,
                                                         object)->object,
                                                        level + 4, 0);
                        } else {
-                               acpi_ex_do_debug_object(source_desc->reference.
-                                                       object, level + 4, 0);
+                               object_desc = source_desc->reference.object;
+                               value = source_desc->reference.value;
+
+                               switch (object_desc->common.type) {
+                               case ACPI_TYPE_BUFFER:
+
+                                       acpi_os_printf("Buffer[%u] = 0x%2.2X\n",
+                                                      value,
+                                                      *source_desc->reference.
+                                                      index_pointer);
+                                       break;
+
+                               case ACPI_TYPE_STRING:
+
+                                       acpi_os_printf
+                                           ("String[%u] = \"%c\" (0x%2.2X)\n",
+                                            value,
+                                            *source_desc->reference.
+                                            index_pointer,
+                                            *source_desc->reference.
+                                            index_pointer);
+                                       break;
+
+                               case ACPI_TYPE_PACKAGE:
+
+                                       acpi_os_printf("Package[%u] = ", value);
+                                       acpi_ex_do_debug_object(*source_desc->
+                                                               reference.where,
+                                                               level + 4, 0);
+                                       break;
+
+                               default:
+
+                                       acpi_os_printf
+                                           ("Unknown Reference object type %X\n",
+                                            object_desc->common.type);
+                                       break;
+                               }
                        }
                }
                break;
index 1da52bef632e1a6b28f343f78ae6277c083f7792..401e7edcd419371a1d661a7f57cc21adce5eb134 100644 (file)
@@ -224,7 +224,7 @@ static struct acpi_exdump_info acpi_ex_dump_index_field[5] = {
        {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(index_field.data_obj), "Data Object"}
 };
 
-static struct acpi_exdump_info acpi_ex_dump_reference[8] = {
+static struct acpi_exdump_info acpi_ex_dump_reference[9] = {
        {ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_reference), NULL},
        {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.class), "Class"},
        {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.target_type), "Target Type"},
@@ -232,6 +232,8 @@ static struct acpi_exdump_info acpi_ex_dump_reference[8] = {
        {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.object), "Object Desc"},
        {ACPI_EXD_NODE, ACPI_EXD_OFFSET(reference.node), "Node"},
        {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.where), "Where"},
+       {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.index_pointer),
+        "Index Pointer"},
        {ACPI_EXD_REFERENCE, 0, NULL}
 };
 
@@ -1005,14 +1007,13 @@ static void acpi_ex_dump_reference_obj(union acpi_operand_object *obj_desc)
        } else if (obj_desc->reference.object) {
                if (ACPI_GET_DESCRIPTOR_TYPE(obj_desc) ==
                    ACPI_DESC_TYPE_OPERAND) {
-                       acpi_os_printf(" Target: %p",
+                       acpi_os_printf("%22s %p", "Target :",
                                       obj_desc->reference.object);
                        if (obj_desc->reference.class == ACPI_REFCLASS_TABLE) {
                                acpi_os_printf(" Table Index: %X\n",
                                               obj_desc->reference.value);
                        } else {
-                               acpi_os_printf(" Target: %p [%s]\n",
-                                              obj_desc->reference.object,
+                               acpi_os_printf(" [%s]\n",
                                               acpi_ut_get_type_name(((union
                                                                       acpi_operand_object
                                                                       *)
index c161dd974f741c1700c501fa6576585034c67e43..61fd9c7b88bc508360ab4d96a034b8ac41bcca65 100644 (file)
@@ -428,7 +428,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
                }
 
                buffer = buffer_desc->buffer.pointer;
-               ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length);
+               memcpy(buffer, source_desc->buffer.pointer, length);
 
                /* Lock entire transaction if requested */
 
index 725a3746a2df095163e5c10ca35077cf8afbb72c..70b7bbbb860b216aaeeb5e520cb5e253fc9b8f6b 100644 (file)
@@ -416,22 +416,22 @@ acpi_ex_field_datum_io(union acpi_operand_object *obj_desc,
                         * Copy the data from the source buffer.
                         * Length is the field width in bytes.
                         */
-                       ACPI_MEMCPY(value,
-                                   (obj_desc->buffer_field.buffer_obj)->buffer.
-                                   pointer +
-                                   obj_desc->buffer_field.base_byte_offset +
-                                   field_datum_byte_offset,
-                                   obj_desc->common_field.access_byte_width);
+                       memcpy(value,
+                              (obj_desc->buffer_field.buffer_obj)->buffer.
+                              pointer +
+                              obj_desc->buffer_field.base_byte_offset +
+                              field_datum_byte_offset,
+                              obj_desc->common_field.access_byte_width);
                } else {
                        /*
                         * Copy the data to the target buffer.
                         * Length is the field width in bytes.
                         */
-                       ACPI_MEMCPY((obj_desc->buffer_field.buffer_obj)->buffer.
-                                   pointer +
-                                   obj_desc->buffer_field.base_byte_offset +
-                                   field_datum_byte_offset, value,
-                                   obj_desc->common_field.access_byte_width);
+                       memcpy((obj_desc->buffer_field.buffer_obj)->buffer.
+                              pointer +
+                              obj_desc->buffer_field.base_byte_offset +
+                              field_datum_byte_offset, value,
+                              obj_desc->common_field.access_byte_width);
                }
 
                status = AE_OK;
@@ -703,7 +703,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc,
                return_ACPI_STATUS(AE_BUFFER_OVERFLOW);
        }
 
-       ACPI_MEMSET(buffer, 0, buffer_length);
+       memset(buffer, 0, buffer_length);
        access_bit_width = ACPI_MUL_8(obj_desc->common_field.access_byte_width);
 
        /* Handle the simple case here */
@@ -720,7 +720,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc,
                        status =
                            acpi_ex_field_datum_io(obj_desc, 0, &raw_datum,
                                                   ACPI_READ);
-                       ACPI_MEMCPY(buffer, &raw_datum, buffer_length);
+                       memcpy(buffer, &raw_datum, buffer_length);
                }
 
                return_ACPI_STATUS(status);
@@ -793,9 +793,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc,
 
                /* Write merged datum to target buffer */
 
-               ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum,
-                           ACPI_MIN(obj_desc->common_field.access_byte_width,
-                                    buffer_length - buffer_offset));
+               memcpy(((char *)buffer) + buffer_offset, &merged_datum,
+                      ACPI_MIN(obj_desc->common_field.access_byte_width,
+                               buffer_length - buffer_offset));
 
                buffer_offset += obj_desc->common_field.access_byte_width;
                merged_datum =
@@ -811,9 +811,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc,
 
        /* Write the last datum to the buffer */
 
-       ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum,
-                   ACPI_MIN(obj_desc->common_field.access_byte_width,
-                            buffer_length - buffer_offset));
+       memcpy(((char *)buffer) + buffer_offset, &merged_datum,
+              ACPI_MIN(obj_desc->common_field.access_byte_width,
+                       buffer_length - buffer_offset));
 
        return_ACPI_STATUS(AE_OK);
 }
@@ -878,7 +878,7 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc,
                 * at Byte zero. All unused (upper) bytes of the
                 * buffer will be 0.
                 */
-               ACPI_MEMCPY((char *)new_buffer, (char *)buffer, buffer_length);
+               memcpy((char *)new_buffer, (char *)buffer, buffer_length);
                buffer = new_buffer;
                buffer_length = required_length;
        }
@@ -918,9 +918,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc,
 
        /* Get initial Datum from the input buffer */
 
-       ACPI_MEMCPY(&raw_datum, buffer,
-                   ACPI_MIN(obj_desc->common_field.access_byte_width,
-                            buffer_length - buffer_offset));
+       memcpy(&raw_datum, buffer,
+              ACPI_MIN(obj_desc->common_field.access_byte_width,
+                       buffer_length - buffer_offset));
 
        merged_datum =
            raw_datum << obj_desc->common_field.start_field_bit_offset;
@@ -970,9 +970,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc,
                /* Get the next input datum from the buffer */
 
                buffer_offset += obj_desc->common_field.access_byte_width;
-               ACPI_MEMCPY(&raw_datum, ((char *)buffer) + buffer_offset,
-                           ACPI_MIN(obj_desc->common_field.access_byte_width,
-                                    buffer_length - buffer_offset));
+               memcpy(&raw_datum, ((char *)buffer) + buffer_offset,
+                      ACPI_MIN(obj_desc->common_field.access_byte_width,
+                               buffer_length - buffer_offset));
 
                merged_datum |=
                    raw_datum << obj_desc->common_field.start_field_bit_offset;
index b56fc9d6f48e3a1180fd234d1e9eacf811202454..d02afece0f103ae9034d54657b9ef2a3ca686d9b 100644 (file)
@@ -209,8 +209,8 @@ acpi_ex_concat_template(union acpi_operand_object *operand0,
         * end_tag descriptor is copied from Operand1.
         */
        new_buf = return_desc->buffer.pointer;
-       ACPI_MEMCPY(new_buf, operand0->buffer.pointer, length0);
-       ACPI_MEMCPY(new_buf + length0, operand1->buffer.pointer, length1);
+       memcpy(new_buf, operand0->buffer.pointer, length0);
+       memcpy(new_buf + length0, operand1->buffer.pointer, length1);
 
        /* Insert end_tag and set the checksum to zero, means "ignore checksum" */
 
@@ -318,14 +318,14 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0,
 
                /* Copy the first integer, LSB first */
 
-               ACPI_MEMCPY(new_buf, &operand0->integer.value,
-                           acpi_gbl_integer_byte_width);
+               memcpy(new_buf, &operand0->integer.value,
+                      acpi_gbl_integer_byte_width);
 
                /* Copy the second integer (LSB first) after the first */
 
-               ACPI_MEMCPY(new_buf + acpi_gbl_integer_byte_width,
-                           &local_operand1->integer.value,
-                           acpi_gbl_integer_byte_width);
+               memcpy(new_buf + acpi_gbl_integer_byte_width,
+                      &local_operand1->integer.value,
+                      acpi_gbl_integer_byte_width);
                break;
 
        case ACPI_TYPE_STRING:
@@ -346,9 +346,9 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0,
 
                /* Concatenate the strings */
 
-               ACPI_STRCPY(new_buf, operand0->string.pointer);
-               ACPI_STRCPY(new_buf + operand0->string.length,
-                           local_operand1->string.pointer);
+               strcpy(new_buf, operand0->string.pointer);
+               strcpy(new_buf + operand0->string.length,
+                      local_operand1->string.pointer);
                break;
 
        case ACPI_TYPE_BUFFER:
@@ -369,11 +369,11 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0,
 
                /* Concatenate the buffers */
 
-               ACPI_MEMCPY(new_buf, operand0->buffer.pointer,
-                           operand0->buffer.length);
-               ACPI_MEMCPY(new_buf + operand0->buffer.length,
-                           local_operand1->buffer.pointer,
-                           local_operand1->buffer.length);
+               memcpy(new_buf, operand0->buffer.pointer,
+                      operand0->buffer.length);
+               memcpy(new_buf + operand0->buffer.length,
+                      local_operand1->buffer.pointer,
+                      local_operand1->buffer.length);
                break;
 
        default:
@@ -660,9 +660,9 @@ acpi_ex_do_logical_op(u16 opcode,
 
                /* Lexicographic compare: compare the data bytes */
 
-               compare = ACPI_MEMCMP(operand0->buffer.pointer,
-                                     local_operand1->buffer.pointer,
-                                     (length0 > length1) ? length1 : length0);
+               compare = memcmp(operand0->buffer.pointer,
+                                local_operand1->buffer.pointer,
+                                (length0 > length1) ? length1 : length0);
 
                switch (opcode) {
                case AML_LEQUAL_OP:     /* LEqual (Operand0, Operand1) */
index 453b00c301773fbb9977efc583f5a9163e54462a..20e87813c7d7c6b304c2bc2cbebb6b2ec370eb13 100644 (file)
@@ -192,7 +192,7 @@ static acpi_status acpi_ex_name_segment(u8 ** in_aml_address, char *name_string)
                char_buf[4] = '\0';
 
                if (name_string) {
-                       ACPI_STRCAT(name_string, char_buf);
+                       strcat(name_string, char_buf);
                        ACPI_DEBUG_PRINT((ACPI_DB_NAMES,
                                          "Appended to - %s\n", name_string));
                } else {
index fcc618aa2061496e089c73fb0c8e760e165bd64b..b8944ebb108145aeb817f1b6f62774da4fb1c8ac 100644 (file)
@@ -337,8 +337,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
                 * Copy the raw buffer data with no transform.
                 * (NULL terminated already)
                 */
-               ACPI_MEMCPY(return_desc->string.pointer,
-                           operand[0]->buffer.pointer, length);
+               memcpy(return_desc->string.pointer,
+                      operand[0]->buffer.pointer, length);
                break;
 
        case AML_CONCAT_RES_OP:
@@ -380,6 +380,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 
                        return_desc->reference.target_type =
                            ACPI_TYPE_BUFFER_FIELD;
+                       return_desc->reference.index_pointer =
+                           &(operand[0]->buffer.pointer[index]);
                        break;
 
                case ACPI_TYPE_BUFFER:
@@ -391,6 +393,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 
                        return_desc->reference.target_type =
                            ACPI_TYPE_BUFFER_FIELD;
+                       return_desc->reference.index_pointer =
+                           &(operand[0]->buffer.pointer[index]);
                        break;
 
                case ACPI_TYPE_PACKAGE:
index 1c64a988cbee538634ab5eef9be21d7b7c315145..fa100b3b92ee8a4180c5ce8fe052e274ac26bce7 100644 (file)
@@ -237,8 +237,8 @@ acpi_status acpi_ex_opcode_3A_1T_1R(struct acpi_walk_state *walk_state)
 
                        /* We have a buffer, copy the portion requested */
 
-                       ACPI_MEMCPY(buffer, operand[0]->string.pointer + index,
-                                   length);
+                       memcpy(buffer, operand[0]->string.pointer + index,
+                              length);
                }
 
                /* Set the length of the new String/Buffer */
index f6c2f5499935c7dffc1c53655b7f1f45da0a7073..b4a5e44c00dd05df639b9d56d06ee44f04dcaf1a 100644 (file)
@@ -517,15 +517,14 @@ acpi_ex_data_table_space_handler(u32 function,
        switch (function) {
        case ACPI_READ:
 
-               ACPI_MEMCPY(ACPI_CAST_PTR(char, value),
-                           ACPI_PHYSADDR_TO_PTR(address),
-                           ACPI_DIV_8(bit_width));
+               memcpy(ACPI_CAST_PTR(char, value),
+                      ACPI_PHYSADDR_TO_PTR(address), ACPI_DIV_8(bit_width));
                break;
 
        case ACPI_WRITE:
 
-               ACPI_MEMCPY(ACPI_PHYSADDR_TO_PTR(address),
-                           ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width));
+               memcpy(ACPI_PHYSADDR_TO_PTR(address),
+                      ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width));
                break;
 
        default:
index 6fa3c8d8fc5f9fd08e8df6f37c781134e7dab842..e1d4f4d51b97a41703546fdd71d6efb5770e3544 100644 (file)
@@ -100,9 +100,9 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc,
 
                /* Clear existing buffer and copy in the new one */
 
-               ACPI_MEMSET(target_desc->buffer.pointer, 0,
-                           target_desc->buffer.length);
-               ACPI_MEMCPY(target_desc->buffer.pointer, buffer, length);
+               memset(target_desc->buffer.pointer, 0,
+                      target_desc->buffer.length);
+               memcpy(target_desc->buffer.pointer, buffer, length);
 
 #ifdef ACPI_OBSOLETE_BEHAVIOR
                /*
@@ -129,8 +129,8 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc,
        } else {
                /* Truncate the source, copy only what will fit */
 
-               ACPI_MEMCPY(target_desc->buffer.pointer, buffer,
-                           target_desc->buffer.length);
+               memcpy(target_desc->buffer.pointer, buffer,
+                      target_desc->buffer.length);
 
                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                                  "Truncating source buffer from %X to %X\n",
@@ -187,9 +187,9 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc,
                 * String will fit in existing non-static buffer.
                 * Clear old string and copy in the new one
                 */
-               ACPI_MEMSET(target_desc->string.pointer, 0,
-                           (acpi_size) target_desc->string.length + 1);
-               ACPI_MEMCPY(target_desc->string.pointer, buffer, length);
+               memset(target_desc->string.pointer, 0,
+                      (acpi_size) target_desc->string.length + 1);
+               memcpy(target_desc->string.pointer, buffer, length);
        } else {
                /*
                 * Free the current buffer, then allocate a new buffer
@@ -210,7 +210,7 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc,
                }
 
                target_desc->common.flags &= ~AOPOBJ_STATIC_POINTER;
-               ACPI_MEMCPY(target_desc->string.pointer, buffer, length);
+               memcpy(target_desc->string.pointer, buffer, length);
        }
 
        /* Set the new target length */
index 3f4225e95d9311ffe5a915f5081f686b0619ca4f..30c3f464fda5bcae5e98c7dd323fb963acea217d 100644 (file)
@@ -378,6 +378,38 @@ void acpi_ex_integer_to_string(char *out_string, u64 value)
        }
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ex_pci_cls_to_string
+ *
+ * PARAMETERS:  out_string      - Where to put the converted string (7 bytes)
+ * PARAMETERS:  class_code      - PCI class code to be converted (3 bytes)
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Convert 3-bytes PCI class code to string representation.
+ *              Return buffer must be large enough to hold the string. The
+ *              string returned is always exactly of length
+ *              ACPI_PCICLS_STRING_SIZE (includes null terminator).
+ *
+ ******************************************************************************/
+
+void acpi_ex_pci_cls_to_string(char *out_string, u8 class_code[3])
+{
+
+       ACPI_FUNCTION_ENTRY();
+
+       /* All 3 bytes are hexadecimal */
+
+       out_string[0] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 4);
+       out_string[1] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 0);
+       out_string[2] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 4);
+       out_string[3] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 0);
+       out_string[4] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 4);
+       out_string[5] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 0);
+       out_string[6] = 0;
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_is_valid_space_id
index 3b3767698827f9d2553af49a3bdbdbae9f331613..52dfd0d050fa30b09077446e55cd7b27f054452d 100644 (file)
 ACPI_MODULE_NAME("hwxfsleep")
 
 /* Local prototypes */
+#if (!ACPI_REDUCED_HARDWARE)
+static acpi_status
+acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs,
+                                   acpi_physical_address physical_address,
+                                   acpi_physical_address physical_address64);
+#endif
+
 static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id);
 
 /*
@@ -72,6 +79,7 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = {
 
 /*
  * These functions are removed for the ACPI_REDUCED_HARDWARE case:
+ *      acpi_set_firmware_waking_vectors
  *      acpi_set_firmware_waking_vector
  *      acpi_set_firmware_waking_vector64
  *      acpi_enter_sleep_state_s4bios
@@ -80,20 +88,26 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = {
 #if (!ACPI_REDUCED_HARDWARE)
 /*******************************************************************************
  *
- * FUNCTION:    acpi_set_firmware_waking_vector
+ * FUNCTION:    acpi_hw_set_firmware_waking_vectors
  *
- * PARAMETERS:  physical_address    - 32-bit physical address of ACPI real mode
+ * PARAMETERS:  facs                - Pointer to FACS table
+ *              physical_address    - 32-bit physical address of ACPI real mode
  *                                    entry point.
+ *              physical_address64  - 64-bit physical address of ACPI protected
+ *                                    mode entry point.
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS
+ * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS
  *
  ******************************************************************************/
 
-acpi_status acpi_set_firmware_waking_vector(u32 physical_address)
+static acpi_status
+acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs,
+                                   acpi_physical_address physical_address,
+                                   acpi_physical_address physical_address64)
 {
-       ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector);
+       ACPI_FUNCTION_TRACE(acpi_hw_set_firmware_waking_vectors);
 
 
        /*
@@ -106,17 +120,92 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address)
 
        /* Set the 32-bit vector */
 
-       acpi_gbl_FACS->firmware_waking_vector = physical_address;
+       facs->firmware_waking_vector = (u32)physical_address;
 
-       /* Clear the 64-bit vector if it exists */
+       if (facs->length > 32) {
+               if (facs->version >= 1) {
 
-       if ((acpi_gbl_FACS->length > 32) && (acpi_gbl_FACS->version >= 1)) {
-               acpi_gbl_FACS->xfirmware_waking_vector = 0;
+                       /* Set the 64-bit vector */
+
+                       facs->xfirmware_waking_vector = physical_address64;
+               } else {
+                       /* Clear the 64-bit vector if it exists */
+
+                       facs->xfirmware_waking_vector = 0;
+               }
        }
 
        return_ACPI_STATUS(AE_OK);
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_set_firmware_waking_vectors
+ *
+ * PARAMETERS:  physical_address    - 32-bit physical address of ACPI real mode
+ *                                    entry point.
+ *              physical_address64  - 64-bit physical address of ACPI protected
+ *                                    mode entry point.
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_set_firmware_waking_vectors(acpi_physical_address physical_address,
+                                acpi_physical_address physical_address64)
+{
+
+       ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vectors);
+
+       /* If Hardware Reduced flag is set, there is no FACS */
+
+       if (acpi_gbl_reduced_hardware) {
+               return_ACPI_STATUS (AE_OK);
+       }
+
+       if (acpi_gbl_facs32) {
+               (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs32,
+                                                         physical_address,
+                                                         physical_address64);
+       }
+       if (acpi_gbl_facs64) {
+               (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs64,
+                                                         physical_address,
+                                                         physical_address64);
+       }
+
+       return_ACPI_STATUS(AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vectors)
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_set_firmware_waking_vector
+ *
+ * PARAMETERS:  physical_address    - 32-bit physical address of ACPI real mode
+ *                                    entry point.
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS
+ *
+ ******************************************************************************/
+acpi_status acpi_set_firmware_waking_vector(u32 physical_address)
+{
+       acpi_status status;
+
+       ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector);
+
+       status = acpi_set_firmware_waking_vectors((acpi_physical_address)
+                                                 physical_address, 0);
+
+       return_ACPI_STATUS(status);
+}
+
 ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector)
 
 #if ACPI_MACHINE_WIDTH == 64
@@ -136,25 +225,19 @@ ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector)
  ******************************************************************************/
 acpi_status acpi_set_firmware_waking_vector64(u64 physical_address)
 {
-       ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64);
-
-
-       /* Determine if the 64-bit vector actually exists */
+       acpi_status status;
 
-       if ((acpi_gbl_FACS->length <= 32) || (acpi_gbl_FACS->version < 1)) {
-               return_ACPI_STATUS(AE_NOT_EXIST);
-       }
+       ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64);
 
-       /* Clear 32-bit vector, set the 64-bit X_ vector */
+       status = acpi_set_firmware_waking_vectors(0,
+                                                 (acpi_physical_address)
+                                                 physical_address);
 
-       acpi_gbl_FACS->firmware_waking_vector = 0;
-       acpi_gbl_FACS->xfirmware_waking_vector = physical_address;
-       return_ACPI_STATUS(AE_OK);
+       return_ACPI_STATUS(status);
 }
 
 ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector64)
 #endif
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_enter_sleep_state_s4bios
index 24fa19a76d704102b89ada50aa836dbf95d0171e..c687b9979fb2a29930c822e7b9bf5f3acb359d30 100644 (file)
@@ -102,7 +102,7 @@ acpi_status acpi_ns_root_initialize(void)
 
                /* _OSI is optional for now, will be permanent later */
 
-               if (!ACPI_STRCMP(init_val->name, "_OSI")
+               if (!strcmp(init_val->name, "_OSI")
                    && !acpi_gbl_create_osi_method) {
                        continue;
                }
@@ -180,7 +180,7 @@ acpi_status acpi_ns_root_initialize(void)
 
                                /* Build an object around the static string */
 
-                               obj_desc->string.length = (u32)ACPI_STRLEN(val);
+                               obj_desc->string.length = (u32)strlen(val);
                                obj_desc->string.pointer = val;
                                obj_desc->common.flags |= AOPOBJ_STATIC_POINTER;
                                break;
@@ -203,7 +203,7 @@ acpi_status acpi_ns_root_initialize(void)
 
                                /* Special case for ACPI Global Lock */
 
-                               if (ACPI_STRCMP(init_val->name, "_GL_") == 0) {
+                               if (strcmp(init_val->name, "_GL_") == 0) {
                                        acpi_gbl_global_lock_mutex = obj_desc;
 
                                        /* Create additional counting semaphore for global lock */
@@ -304,7 +304,9 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
                return_ACPI_STATUS(AE_BAD_PARAMETER);
        }
 
-       local_flags = flags & ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_SEARCH_PARENT);
+       local_flags = flags &
+           ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_OVERRIDE_IF_FOUND |
+             ACPI_NS_SEARCH_PARENT);
        *return_node = ACPI_ENTRY_NOT_FOUND;
        acpi_gbl_ns_lookup_count++;
 
@@ -547,6 +549,12 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
                        if (flags & ACPI_NS_ERROR_IF_FOUND) {
                                local_flags |= ACPI_NS_ERROR_IF_FOUND;
                        }
+
+                       /* Set override flag according to caller */
+
+                       if (flags & ACPI_NS_OVERRIDE_IF_FOUND) {
+                               local_flags |= ACPI_NS_OVERRIDE_IF_FOUND;
+                       }
                }
 
                /* Extract one ACPI name from the front of the pathname */
index 1a8b39c8d969d9567654c23cff984285b241f0c1..da55a1c60da180cf4f2aeec66f1dd0f41e22d974 100644 (file)
@@ -187,8 +187,8 @@ acpi_ns_convert_to_string(union acpi_operand_object *original_object,
                 * Copy the raw buffer data with no transform. String is already NULL
                 * terminated at Length+1.
                 */
-               ACPI_MEMCPY(new_object->string.pointer,
-                           original_object->buffer.pointer, length);
+               memcpy(new_object->string.pointer,
+                      original_object->buffer.pointer, length);
                break;
 
        default:
@@ -251,9 +251,9 @@ acpi_ns_convert_to_buffer(union acpi_operand_object *original_object,
                        return (AE_NO_MEMORY);
                }
 
-               ACPI_MEMCPY(new_object->buffer.pointer,
-                           original_object->string.pointer,
-                           original_object->string.length);
+               memcpy(new_object->buffer.pointer,
+                      original_object->string.pointer,
+                      original_object->string.length);
                break;
 
        case ACPI_TYPE_PACKAGE:
index d259393505fa9bb9230ef51a152a01c317c5350c..0f1daba640e7a5f70c5a6f0196398f65551219d8 100644 (file)
@@ -101,7 +101,7 @@ void acpi_ns_print_pathname(u32 num_segments, char *pathname)
 
        while (num_segments) {
                for (i = 0; i < 4; i++) {
-                       ACPI_IS_PRINT(pathname[i]) ?
+                       isprint((int)pathname[i]) ?
                            acpi_os_printf("%c", pathname[i]) :
                            acpi_os_printf("?");
                }
index 7bcc68f57afa61d24d6d170e04bb9c2db8271900..80670cb32b5a3fe9438e735d26ef1b269be1e3f7 100644 (file)
@@ -59,15 +59,14 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj,
  *
  * FUNCTION:    acpi_ns_evaluate
  *
- * PARAMETERS:  info            - Evaluation info block, contains:
+ * PARAMETERS:  info            - Evaluation info block, contains these fields
+ *                                and more:
  *                  prefix_node     - Prefix or Method/Object Node to execute
  *                  relative_path   - Name of method to execute, If NULL, the
  *                                    Node is the object to execute
  *                  parameters      - List of parameters to pass to the method,
  *                                    terminated by NULL. Params itself may be
  *                                    NULL if no parameters are being passed.
- *                  return_object   - Where to put method's return value (if
- *                                    any). If NULL, no value is returned.
  *                  parameter_type  - Type of Parameter list
  *                  return_object   - Where to put method's return value (if
  *                                    any). If NULL, no value is returned.
@@ -440,7 +439,7 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj,
 
        /* Initialize the evaluation information block */
 
-       ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info));
+       memset(info, 0, sizeof(struct acpi_evaluate_info));
        info->prefix_node = parent_node;
 
        /*
index 4a85c45179883b14421f324d4fcc2b9f923f0309..b744a53618eb3977663ab75a6a31e808e96b5aeb 100644 (file)
@@ -90,7 +90,7 @@ acpi_status acpi_ns_initialize_objects(void)
 
        /* Set all init info to zero */
 
-       ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info));
+       memset(&info, 0, sizeof(struct acpi_init_walk_info));
 
        /* Walk entire namespace from the supplied root */
 
@@ -566,7 +566,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
        ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname
                        (ACPI_TYPE_METHOD, device_node, METHOD_NAME__INI));
 
-       ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info));
+       memset(info, 0, sizeof(struct acpi_evaluate_info));
        info->prefix_node = device_node;
        info->relative_pathname = METHOD_NAME__INI;
        info->parameters = NULL;
index c95a119767b56fad8569b31d904c42edb643361f..57a4cfe547e4921d09cd41c1a2005cd769b22f83 100644 (file)
@@ -117,6 +117,13 @@ acpi_ns_one_complete_parse(u32 pass_number,
                                               (u8) pass_number);
        }
 
+       /* Found OSDT table, enable the namespace override feature */
+
+       if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT) &&
+           pass_number == ACPI_IMODE_LOAD_PASS1) {
+               walk_state->namespace_override = TRUE;
+       }
+
        if (ACPI_FAILURE(status)) {
                acpi_ds_delete_walk_state(walk_state);
                goto cleanup;
index c30672d238789668fdfd70ea7ad512838e3e7f55..0515a70f42a4fb6f27a78466b004d34517c8b51c 100644 (file)
@@ -580,7 +580,7 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info,
         * # is a hex digit.
         */
        for (dest = new_string->string.pointer; *source; dest++, source++) {
-               *dest = (char)ACPI_TOUPPER(*source);
+               *dest = (char)toupper((int)*source);
        }
 
        acpi_ut_remove_reference(return_object);
index 4a9d4a66016e51eefcb851979489bb871edab2a2..d7390401383043d7af82cf51233cdfea4f825566 100644 (file)
@@ -325,8 +325,41 @@ acpi_ns_search_and_enter(u32 target_name,
                 * If we found it AND the request specifies that a find is an error,
                 * return the error
                 */
-               if ((status == AE_OK) && (flags & ACPI_NS_ERROR_IF_FOUND)) {
-                       status = AE_ALREADY_EXISTS;
+               if (status == AE_OK) {
+
+                       /* The node was found in the namespace */
+
+                       /*
+                        * If the namespace override feature is enabled for this node,
+                        * delete any existing attached sub-object and make the node
+                        * look like a new node that is owned by the override table.
+                        */
+                       if (flags & ACPI_NS_OVERRIDE_IF_FOUND) {
+                               ACPI_DEBUG_PRINT((ACPI_DB_NAMES,
+                                                 "Namespace override: %4.4s pass %u type %X Owner %X\n",
+                                                 ACPI_CAST_PTR(char,
+                                                               &target_name),
+                                                 interpreter_mode,
+                                                 (*return_node)->type,
+                                                 walk_state->owner_id));
+
+                               acpi_ns_delete_children(*return_node);
+                               if (acpi_gbl_runtime_namespace_override) {
+                                       acpi_ut_remove_reference((*return_node)->object);
+                                       (*return_node)->object = NULL;
+                                       (*return_node)->owner_id =
+                                           walk_state->owner_id;
+                               } else {
+                                       acpi_ns_remove_node(*return_node);
+                                       *return_node = ACPI_ENTRY_NOT_FOUND;
+                               }
+                       }
+
+                       /* Return an error if we don't expect to find the object */
+
+                       else if (flags & ACPI_NS_ERROR_IF_FOUND) {
+                               status = AE_ALREADY_EXISTS;
+                       }
                }
 #ifdef ACPI_ASL_COMPILER
                if (*return_node && (*return_node)->type == ACPI_TYPE_ANY) {
index 6ad02008c0c23ae9cb7bbd3f4d8fa86b348f2dc6..8d8104b8bd28affdf70a35c38295116c25ec423c 100644 (file)
@@ -292,8 +292,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
                        } else {
                                /* Convert the character to uppercase and save it */
 
-                               result[i] =
-                                   (char)ACPI_TOUPPER((int)*external_name);
+                               result[i] = (char)toupper((int)*external_name);
                                external_name++;
                        }
                }
index b6030a2deee1248ab9b783bc3d30ce7ac9fbaa75..6ee1e52b903d344d31ed5b140f8385204cb5c94c 100644 (file)
@@ -696,7 +696,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
                        return (AE_CTRL_DEPTH);
                }
 
-               no_match = ACPI_STRCMP(hid->string, info->hid);
+               no_match = strcmp(hid->string, info->hid);
                ACPI_FREE(hid);
 
                if (no_match) {
@@ -715,8 +715,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 
                        found = FALSE;
                        for (i = 0; i < cid->count; i++) {
-                               if (ACPI_STRCMP(cid->ids[i].string, info->hid)
-                                   == 0) {
+                               if (strcmp(cid->ids[i].string, info->hid) == 0) {
 
                                        /* Found a matching CID */
 
index d66c326485d82e769d5c93d4692ac0b02ca277f1..9ff643b9553fe9a4d97a803b4a0b0e7def2b299f 100644 (file)
@@ -114,7 +114,7 @@ acpi_get_handle(acpi_handle parent,
 
                /* Special case for root-only, since we can't search for it */
 
-               if (!ACPI_STRCMP(pathname, ACPI_NS_ROOT_PATH)) {
+               if (!strcmp(pathname, ACPI_NS_ROOT_PATH)) {
                        *ret_handle =
                            ACPI_CAST_PTR(acpi_handle, acpi_gbl_root_node);
                        return (AE_OK);
@@ -242,7 +242,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest,
 
        /* Copy actual string and return a pointer to the next string area */
 
-       ACPI_MEMCPY(string_area, source->string, source->length);
+       memcpy(string_area, source->string, source->length);
        return (string_area + source->length);
 }
 
@@ -260,7 +260,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest,
  *              control methods (Such as in the case of a device.)
  *
  * For Device and Processor objects, run the Device _HID, _UID, _CID, _SUB,
- * _STA, _ADR, _sx_w, and _sx_d methods.
+ * _CLS, _STA, _ADR, _sx_w, and _sx_d methods.
  *
  * Note: Allocates the return buffer, must be freed by the caller.
  *
@@ -276,11 +276,12 @@ acpi_get_object_info(acpi_handle handle,
        struct acpi_pnp_device_id *hid = NULL;
        struct acpi_pnp_device_id *uid = NULL;
        struct acpi_pnp_device_id *sub = NULL;
+       struct acpi_pnp_device_id *cls = NULL;
        char *next_id_string;
        acpi_object_type type;
        acpi_name name;
        u8 param_count = 0;
-       u8 valid = 0;
+       u16 valid = 0;
        u32 info_size;
        u32 i;
        acpi_status status;
@@ -320,7 +321,7 @@ acpi_get_object_info(acpi_handle handle,
        if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
                /*
                 * Get extra info for ACPI Device/Processor objects only:
-                * Run the Device _HID, _UID, _SUB, and _CID methods.
+                * Run the Device _HID, _UID, _SUB, _CID, and _CLS methods.
                 *
                 * Note: none of these methods are required, so they may or may
                 * not be present for this device. The Info->Valid bitfield is used
@@ -363,6 +364,14 @@ acpi_get_object_info(acpi_handle handle,
                             sizeof(struct acpi_pnp_device_id_list));
                        valid |= ACPI_VALID_CID;
                }
+
+               /* Execute the Device._CLS method */
+
+               status = acpi_ut_execute_CLS(node, &cls);
+               if (ACPI_SUCCESS(status)) {
+                       info_size += cls->length;
+                       valid |= ACPI_VALID_CLS;
+               }
        }
 
        /*
@@ -486,6 +495,11 @@ acpi_get_object_info(acpi_handle handle,
                }
        }
 
+       if (cls) {
+               next_id_string = acpi_ns_copy_device_id(&info->class_code,
+                                                       cls, next_id_string);
+       }
+
        /* Copy the fixed-length data */
 
        info->info_size = info_size;
@@ -510,6 +524,9 @@ cleanup:
        if (cid_list) {
                ACPI_FREE(cid_list);
        }
+       if (cls) {
+               ACPI_FREE(cls);
+       }
        return (status);
 }
 
@@ -620,7 +637,7 @@ acpi_status acpi_install_method(u8 *buffer)
 
        /* Copy the method AML to the local buffer */
 
-       ACPI_MEMCPY(aml_buffer, aml_start, aml_length);
+       memcpy(aml_buffer, aml_start, aml_length);
 
        /* Initialize the method object with the new method's information */
 
index 960505ab409a8b3958336058bb26837b6c9109c0..32440912023a7d4a8ab65bf84de8df7beb37d633 100644 (file)
@@ -93,10 +93,9 @@ void acpi_ps_init_op(union acpi_parse_object *op, u16 opcode)
        op->common.descriptor_type = ACPI_DESC_TYPE_PARSER;
        op->common.aml_opcode = opcode;
 
-       ACPI_DISASM_ONLY_MEMBERS(ACPI_STRNCPY(op->common.aml_op_name,
-                                             (acpi_ps_get_opcode_info
-                                              (opcode))->name,
-                                             sizeof(op->common.aml_op_name)));
+       ACPI_DISASM_ONLY_MEMBERS(strncpy(op->common.aml_op_name,
+                                        (acpi_ps_get_opcode_info(opcode))->
+                                        name, sizeof(op->common.aml_op_name)));
 }
 
 /*******************************************************************************
index 15434e4c9b344411f6b3c0ec8dbb5193f81b9e41..3fa829e96c2a0de77e079fb8d95a3a7f25240a4f 100644 (file)
@@ -353,13 +353,13 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object,
                                /* +1 to include null terminator */
 
                                user_prt->length +=
-                                   (u32) ACPI_STRLEN(user_prt->source) + 1;
+                                   (u32)strlen(user_prt->source) + 1;
                                break;
 
                        case ACPI_TYPE_STRING:
 
-                               ACPI_STRCPY(user_prt->source,
-                                           obj_desc->string.pointer);
+                               strcpy(user_prt->source,
+                                      obj_desc->string.pointer);
 
                                /*
                                 * Add to the Length field the length of the string
index 1fe49d22366333b2172e82d622bfe7f6289d8f0d..ac37852e082173869fef4cc872e6734451d0419f 100644 (file)
@@ -119,7 +119,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource,
                        /*
                         * Get the resource type and the initial (minimum) length
                         */
-                       ACPI_MEMSET(resource, 0, INIT_RESOURCE_LENGTH(info));
+                       memset(resource, 0, INIT_RESOURCE_LENGTH(info));
                        resource->type = INIT_RESOURCE_TYPE(info);
                        resource->length = INIT_RESOURCE_LENGTH(info);
                        break;
@@ -324,13 +324,13 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource,
 
                case ACPI_RSC_SET8:
 
-                       ACPI_MEMSET(destination, info->aml_offset, info->value);
+                       memset(destination, info->aml_offset, info->value);
                        break;
 
                case ACPI_RSC_DATA8:
 
                        target = ACPI_ADD_PTR(char, resource, info->value);
-                       ACPI_MEMCPY(destination, source, ACPI_GET16(target));
+                       memcpy(destination, source, ACPI_GET16(target));
                        break;
 
                case ACPI_RSC_ADDRESS:
@@ -502,7 +502,7 @@ acpi_rs_convert_resource_to_aml(struct acpi_resource *resource,
                switch (info->opcode) {
                case ACPI_RSC_INITSET:
 
-                       ACPI_MEMSET(aml, 0, INIT_RESOURCE_LENGTH(info));
+                       memset(aml, 0, INIT_RESOURCE_LENGTH(info));
                        aml_length = INIT_RESOURCE_LENGTH(info);
                        acpi_rs_set_resource_header(INIT_RESOURCE_TYPE(info),
                                                    aml_length, aml);
index ece3cd60cc6a0664608cbe97b1110845ce26330d..52b024df00524bffae3bb1117cffa0d3dc31eb3b 100644 (file)
@@ -148,7 +148,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type)
                case ACPI_RSC_MOVE_SERIAL_VEN:
                case ACPI_RSC_MOVE_SERIAL_RES:
 
-                       ACPI_MEMCPY(destination, source, item_count);
+                       memcpy(destination, source, item_count);
                        return;
 
                        /*
@@ -364,12 +364,11 @@ acpi_rs_get_resource_source(acpi_rs_length resource_length,
                 * Zero the entire area of the buffer.
                 */
                total_length =
-                   (u32)
-                   ACPI_STRLEN(ACPI_CAST_PTR(char, &aml_resource_source[1])) +
+                   (u32)strlen(ACPI_CAST_PTR(char, &aml_resource_source[1])) +
                    1;
-               total_length = (u32) ACPI_ROUND_UP_TO_NATIVE_WORD(total_length);
+               total_length = (u32)ACPI_ROUND_UP_TO_NATIVE_WORD(total_length);
 
-               ACPI_MEMSET(resource_source->string_ptr, 0, total_length);
+               memset(resource_source->string_ptr, 0, total_length);
 
                /* Copy the resource_source string to the destination */
 
@@ -432,8 +431,8 @@ acpi_rs_set_resource_source(union aml_resource * aml,
 
                /* Copy the resource_source string */
 
-               ACPI_STRCPY(ACPI_CAST_PTR(char, &aml_resource_source[1]),
-                           resource_source->string_ptr);
+               strcpy(ACPI_CAST_PTR(char, &aml_resource_source[1]),
+                      resource_source->string_ptr);
 
                /*
                 * Add the length of the string (+ 1 for null terminator) to the
index 8e6276df0226ef5ff5e5297afa0d5708e471c738..de51f836ef68e23280897bda0aca300b5341db90 100644 (file)
@@ -398,8 +398,8 @@ acpi_resource_to_address64(struct acpi_resource *resource,
 
                /* Simple copy for 64 bit source */
 
-               ACPI_MEMCPY(out, &resource->data,
-                           sizeof(struct acpi_resource_address64));
+               memcpy(out, &resource->data,
+                      sizeof(struct acpi_resource_address64));
                break;
 
        default:
@@ -499,7 +499,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context)
         */
        if ((vendor->byte_length < (ACPI_UUID_LENGTH + 1)) ||
            (vendor->uuid_subtype != info->uuid->subtype) ||
-           (ACPI_MEMCMP(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) {
+           (memcmp(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) {
                return (AE_OK);
        }
 
@@ -513,7 +513,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context)
 
        /* Found the correct resource, copy and return it */
 
-       ACPI_MEMCPY(buffer->pointer, resource, resource->length);
+       memcpy(buffer->pointer, resource, resource->length);
        buffer->length = resource->length;
 
        /* Found the desired descriptor, terminate resource walk */
index d7f8386455bdc0e2ce35a36b7f80c188d376b86f..5c9d5abf15887e4e5d42f3e8651eee0876bb1697 100644 (file)
@@ -73,7 +73,7 @@ acpi_tb_init_table_descriptor(struct acpi_table_desc *table_desc,
         * Initialize the table descriptor. Set the pointer to NULL, since the
         * table is not fully mapped at this time.
         */
-       ACPI_MEMSET(table_desc, 0, sizeof(struct acpi_table_desc));
+       memset(table_desc, 0, sizeof(struct acpi_table_desc));
        table_desc->address = address;
        table_desc->length = table->length;
        table_desc->flags = flags;
@@ -465,9 +465,9 @@ acpi_status acpi_tb_resize_root_table_list(void)
        /* Copy and free the previous table array */
 
        if (acpi_gbl_root_table_list.tables) {
-               ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables,
-                           (acpi_size) table_count *
-                           sizeof(struct acpi_table_desc));
+               memcpy(tables, acpi_gbl_root_table_list.tables,
+                      (acpi_size) table_count *
+                      sizeof(struct acpi_table_desc));
 
                if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) {
                        ACPI_FREE(acpi_gbl_root_table_list.tables);
index 7d2486005e3f24fe0ca38d61d8dc36d21362066e..6253001b6375d16629b7066f23377460ecbf0fce 100644 (file)
@@ -350,9 +350,18 @@ void acpi_tb_parse_fadt(u32 table_index)
        /* If Hardware Reduced flag is set, there is no FACS */
 
        if (!acpi_gbl_reduced_hardware) {
-               acpi_tb_install_fixed_table((acpi_physical_address)
-                                           acpi_gbl_FADT.Xfacs, ACPI_SIG_FACS,
-                                           ACPI_TABLE_INDEX_FACS);
+               if (acpi_gbl_FADT.facs) {
+                       acpi_tb_install_fixed_table((acpi_physical_address)
+                                                   acpi_gbl_FADT.facs,
+                                                   ACPI_SIG_FACS,
+                                                   ACPI_TABLE_INDEX_FACS);
+               }
+               if (acpi_gbl_FADT.Xfacs) {
+                       acpi_tb_install_fixed_table((acpi_physical_address)
+                                                   acpi_gbl_FADT.Xfacs,
+                                                   ACPI_SIG_FACS,
+                                                   ACPI_TABLE_INDEX_X_FACS);
+               }
        }
 }
 
@@ -389,12 +398,12 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length)
 
        /* Clear the entire local FADT */
 
-       ACPI_MEMSET(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt));
+       memset(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt));
 
        /* Copy the original FADT, up to sizeof (struct acpi_table_fadt) */
 
-       ACPI_MEMCPY(&acpi_gbl_FADT, table,
-                   ACPI_MIN(length, sizeof(struct acpi_table_fadt)));
+       memcpy(&acpi_gbl_FADT, table,
+              ACPI_MIN(length, sizeof(struct acpi_table_fadt)));
 
        /* Take a copy of the Hardware Reduced flag */
 
@@ -491,13 +500,9 @@ static void acpi_tb_convert_fadt(void)
        acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt);
 
        /*
-        * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary.
+        * Expand the 32-bit DSDT addresses to 64-bit as necessary.
         * Later ACPICA code will always use the X 64-bit field.
         */
-       acpi_gbl_FADT.Xfacs = acpi_tb_select_address("FACS",
-                                                    acpi_gbl_FADT.facs,
-                                                    acpi_gbl_FADT.Xfacs);
-
        acpi_gbl_FADT.Xdsdt = acpi_tb_select_address("DSDT",
                                                     acpi_gbl_FADT.dsdt,
                                                     acpi_gbl_FADT.Xdsdt);
index 0b879fcfef670c535f30ac57cb403ff87dcbf266..119c84ad98334e2404fb72e585c4ec30ee49c684 100644 (file)
@@ -76,16 +76,16 @@ acpi_tb_find_table(char *signature,
 
        /* Normalize the input strings */
 
-       ACPI_MEMSET(&header, 0, sizeof(struct acpi_table_header));
+       memset(&header, 0, sizeof(struct acpi_table_header));
        ACPI_MOVE_NAME(header.signature, signature);
-       ACPI_STRNCPY(header.oem_id, oem_id, ACPI_OEM_ID_SIZE);
-       ACPI_STRNCPY(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
+       strncpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE);
+       strncpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
 
        /* Search for the table */
 
        for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
-               if (ACPI_MEMCMP(&(acpi_gbl_root_table_list.tables[i].signature),
-                               header.signature, ACPI_NAME_SIZE)) {
+               if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature),
+                          header.signature, ACPI_NAME_SIZE)) {
 
                        /* Not the requested table */
 
@@ -112,21 +112,20 @@ acpi_tb_find_table(char *signature,
 
                /* Check for table match on all IDs */
 
-               if (!ACPI_MEMCMP
+               if (!memcmp
                    (acpi_gbl_root_table_list.tables[i].pointer->signature,
                     header.signature, ACPI_NAME_SIZE) && (!oem_id[0]
                                                           ||
-                                                          !ACPI_MEMCMP
+                                                          !memcmp
                                                           (acpi_gbl_root_table_list.
                                                            tables[i].pointer->
                                                            oem_id,
                                                            header.oem_id,
                                                            ACPI_OEM_ID_SIZE))
                    && (!oem_table_id[0]
-                       || !ACPI_MEMCMP(acpi_gbl_root_table_list.tables[i].
-                                       pointer->oem_table_id,
-                                       header.oem_table_id,
-                                       ACPI_OEM_TABLE_ID_SIZE))) {
+                       || !memcmp(acpi_gbl_root_table_list.tables[i].pointer->
+                                  oem_table_id, header.oem_table_id,
+                                  ACPI_OEM_TABLE_ID_SIZE))) {
                        *table_index = i;
 
                        ACPI_DEBUG_PRINT((ACPI_DB_TABLES,
index 008a251780f4c955194c7193dd943bf955028b34..15ea98e0068d80971834a5d15bd5c7ef30356f41 100644 (file)
@@ -87,8 +87,8 @@ acpi_tb_compare_tables(struct acpi_table_desc *table_desc, u32 table_index)
         * not just the header.
         */
        is_identical = (u8)((table_desc->length != table_length ||
-                            ACPI_MEMCMP(table_desc->pointer, table,
-                                        table_length)) ? FALSE : TRUE);
+                            memcmp(table_desc->pointer, table, table_length)) ?
+                           FALSE : TRUE);
 
        /* Release the acquired table */
 
@@ -289,8 +289,7 @@ acpi_tb_install_standard_table(acpi_physical_address address,
                if ((new_table_desc.signature.ascii[0] != 0x00) &&
                    (!ACPI_COMPARE_NAME
                     (&new_table_desc.signature, ACPI_SIG_SSDT))
-                   && (ACPI_STRNCMP(new_table_desc.signature.ascii, "OEM", 3)))
-               {
+                   && (strncmp(new_table_desc.signature.ascii, "OEM", 3))) {
                        ACPI_BIOS_ERROR((AE_INFO,
                                         "Table has invalid signature [%4.4s] (0x%8.8X), "
                                         "must be SSDT or OEMx",
index 77ba5c71c6e787e88a7f7923e44e1bc3adfeb7ca..709d5112fc1679db4a6ff28412a27bbe5c334d81 100644 (file)
@@ -73,7 +73,7 @@ static void acpi_tb_fix_string(char *string, acpi_size length)
 {
 
        while (length && *string) {
-               if (!ACPI_IS_PRINT(*string)) {
+               if (!isprint((int)*string)) {
                        *string = '?';
                }
                string++;
@@ -100,7 +100,7 @@ acpi_tb_cleanup_table_header(struct acpi_table_header *out_header,
                             struct acpi_table_header *header)
 {
 
-       ACPI_MEMCPY(out_header, header, sizeof(struct acpi_table_header));
+       memcpy(out_header, header, sizeof(struct acpi_table_header));
 
        acpi_tb_fix_string(out_header->signature, ACPI_NAME_SIZE);
        acpi_tb_fix_string(out_header->oem_id, ACPI_OEM_ID_SIZE);
@@ -138,9 +138,9 @@ acpi_tb_print_table_header(acpi_physical_address address,
 
                /* RSDP has no common fields */
 
-               ACPI_MEMCPY(local_header.oem_id,
-                           ACPI_CAST_PTR(struct acpi_table_rsdp,
-                                         header)->oem_id, ACPI_OEM_ID_SIZE);
+               memcpy(local_header.oem_id,
+                      ACPI_CAST_PTR(struct acpi_table_rsdp, header)->oem_id,
+                      ACPI_OEM_ID_SIZE);
                acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE);
 
                ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)",
index 6559a58439c5dfadb0adbb19ac8c016d4197573b..568ac0e4a3c6a784efe38349213c26146888f6d5 100644 (file)
@@ -68,7 +68,6 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size);
 
 acpi_status acpi_tb_initialize_facs(void)
 {
-       acpi_status status;
 
        /* If Hardware Reduced flag is set, there is no FACS */
 
@@ -77,11 +76,25 @@ acpi_status acpi_tb_initialize_facs(void)
                return (AE_OK);
        }
 
-       status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS,
-                                        ACPI_CAST_INDIRECT_PTR(struct
-                                                               acpi_table_header,
-                                                               &acpi_gbl_FACS));
-       return (status);
+       (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS,
+                                     ACPI_CAST_INDIRECT_PTR(struct
+                                                            acpi_table_header,
+                                                            &acpi_gbl_facs32));
+       (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS,
+                                     ACPI_CAST_INDIRECT_PTR(struct
+                                                            acpi_table_header,
+                                                            &acpi_gbl_facs64));
+
+       if (acpi_gbl_facs64
+           && (!acpi_gbl_facs32 || !acpi_gbl_use32_bit_facs_addresses)) {
+               acpi_gbl_FACS = acpi_gbl_facs64;
+       } else if (acpi_gbl_facs32) {
+               acpi_gbl_FACS = acpi_gbl_facs32;
+       }
+
+       /* If there is no FACS, just continue. There was already an error msg */
+
+       return (AE_OK);
 }
 #endif                         /* !ACPI_REDUCED_HARDWARE */
 
@@ -101,7 +114,7 @@ acpi_status acpi_tb_initialize_facs(void)
 u8 acpi_tb_tables_loaded(void)
 {
 
-       if (acpi_gbl_root_table_list.current_table_count >= 3) {
+       if (acpi_gbl_root_table_list.current_table_count >= 4) {
                return (TRUE);
        }
 
@@ -175,7 +188,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index)
                return (NULL);
        }
 
-       ACPI_MEMCPY(new_table, table_desc->pointer, table_desc->length);
+       memcpy(new_table, table_desc->pointer, table_desc->length);
        acpi_tb_uninstall_table(table_desc);
 
        acpi_tb_init_table_descriptor(&acpi_gbl_root_table_list.
@@ -357,11 +370,11 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
        table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header));
 
        /*
-        * First two entries in the table array are reserved for the DSDT
-        * and FACS, which are not actually present in the RSDT/XSDT - they
-        * come from the FADT
+        * First three entries in the table array are reserved for the DSDT
+        * and 32bit/64bit FACS, which are not actually present in the
+        * RSDT/XSDT - they come from the FADT
         */
-       acpi_gbl_root_table_list.current_table_count = 2;
+       acpi_gbl_root_table_list.current_table_count = 3;
 
        /* Initialize the root table array from the RSDT/XSDT */
 
index 60e94f87f27aeea917c9705358ea4675666363d9..5559e2c70b15634384fa907b1934d1df4c97bdf3 100644 (file)
@@ -119,9 +119,9 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array,
        } else {
                /* Root Table Array has been statically allocated by the host */
 
-               ACPI_MEMSET(initial_table_array, 0,
-                           (acpi_size) initial_table_count *
-                           sizeof(struct acpi_table_desc));
+               memset(initial_table_array, 0,
+                      (acpi_size) initial_table_count *
+                      sizeof(struct acpi_table_desc));
 
                acpi_gbl_root_table_list.tables = initial_table_array;
                acpi_gbl_root_table_list.max_table_count = initial_table_count;
@@ -242,8 +242,9 @@ acpi_get_table_header(char *signature,
                                if (!header) {
                                        return (AE_NO_MEMORY);
                                }
-                               ACPI_MEMCPY(out_table_header, header,
-                                           sizeof(struct acpi_table_header));
+
+                               memcpy(out_table_header, header,
+                                      sizeof(struct acpi_table_header));
                                acpi_os_unmap_memory(header,
                                                     sizeof(struct
                                                            acpi_table_header));
@@ -251,9 +252,9 @@ acpi_get_table_header(char *signature,
                                return (AE_NOT_FOUND);
                        }
                } else {
-                       ACPI_MEMCPY(out_table_header,
-                                   acpi_gbl_root_table_list.tables[i].pointer,
-                                   sizeof(struct acpi_table_header));
+                       memcpy(out_table_header,
+                              acpi_gbl_root_table_list.tables[i].pointer,
+                              sizeof(struct acpi_table_header));
                }
                return (AE_OK);
        }
index aadb3002a2ddd9cfb020d2cbcf49919313c1c589..9682d40ca6ffe70f8c50078fc4f89172b7b37702 100644 (file)
@@ -150,8 +150,8 @@ static acpi_status acpi_tb_load_namespace(void)
         * Save the original DSDT header for detection of table corruption
         * and/or replacement of the DSDT from outside the OS.
         */
-       ACPI_MEMCPY(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT,
-                   sizeof(struct acpi_table_header));
+       memcpy(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT,
+              sizeof(struct acpi_table_header));
 
        (void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
 
@@ -166,13 +166,18 @@ static acpi_status acpi_tb_load_namespace(void)
 
        (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
        for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
-               if ((!ACPI_COMPARE_NAME
+               if (!acpi_gbl_root_table_list.tables[i].address ||
+                   (!ACPI_COMPARE_NAME
                     (&(acpi_gbl_root_table_list.tables[i].signature),
                      ACPI_SIG_SSDT)
                     &&
                     !ACPI_COMPARE_NAME(&
                                        (acpi_gbl_root_table_list.tables[i].
-                                        signature), ACPI_SIG_PSDT))
+                                        signature), ACPI_SIG_PSDT)
+                    &&
+                    !ACPI_COMPARE_NAME(&
+                                       (acpi_gbl_root_table_list.tables[i].
+                                        signature), ACPI_SIG_OSDT))
                    ||
                    ACPI_FAILURE(acpi_tb_validate_table
                                 (&acpi_gbl_root_table_list.tables[i]))) {
@@ -219,9 +224,9 @@ acpi_install_table(acpi_physical_address address, u8 physical)
        ACPI_FUNCTION_TRACE(acpi_install_table);
 
        if (physical) {
-               flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL;
-       } else {
                flags = ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL;
+       } else {
+               flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL;
        }
 
        status = acpi_tb_install_standard_table(address, flags,
index 61d8f6d186d11c15c0f6f6c9489cf2b6d7fdae32..7a4101f0685e90d748fc8910255abf2df517331c 100644 (file)
@@ -73,7 +73,7 @@ void *acpi_os_allocate_zeroed(acpi_size size)
 
                /* Clear the memory block */
 
-               ACPI_MEMSET(allocation, 0, size);
+               memset(allocation, 0, size);
        }
 
        return (allocation);
@@ -181,7 +181,7 @@ acpi_status acpi_ut_delete_caches(void)
        char buffer[7];
 
        if (acpi_gbl_display_final_mem_stats) {
-               ACPI_STRCPY(buffer, "MEMORY");
+               strcpy(buffer, "MEMORY");
                (void)acpi_db_display_statistics(buffer);
        }
 #endif
@@ -337,6 +337,6 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer,
 
        /* Have a valid buffer, clear it */
 
-       ACPI_MEMSET(buffer->pointer, 0, required_length);
+       memset(buffer->pointer, 0, required_length);
        return (AE_OK);
 }
index a8c39643e6181fbebff0aa047b1391551720082a..01c8709ca58694a17c4e766fd852a14898ac19d8 100644 (file)
@@ -159,7 +159,7 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset)
                        }
 
                        buf_char = buffer[(acpi_size) i + j];
-                       if (ACPI_IS_PRINT(buf_char)) {
+                       if (isprint(buf_char)) {
                                acpi_os_printf("%c", buf_char);
                        } else {
                                acpi_os_printf(".");
@@ -319,7 +319,7 @@ acpi_ut_dump_buffer_to_file(ACPI_FILE file,
                        }
 
                        buf_char = buffer[(acpi_size) i + j];
-                       if (ACPI_IS_PRINT(buf_char)) {
+                       if (isprint(buf_char)) {
                                acpi_ut_file_printf(file, "%c", buf_char);
                        } else {
                                acpi_ut_file_printf(file, ".");
index eacc5eee362ebd9e7c5290ff6054ee5009ca633f..0d21fbd993633f774d78ca93f7bef2a33f2ff72f 100644 (file)
@@ -84,7 +84,7 @@ acpi_os_create_cache(char *cache_name,
 
        /* Populate the cache object and return it */
 
-       ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list));
+       memset(cache, 0, sizeof(struct acpi_memory_list));
        cache->list_name = cache_name;
        cache->object_size = object_size;
        cache->max_depth = max_depth;
@@ -212,7 +212,7 @@ acpi_os_release_object(struct acpi_memory_list * cache, void *object)
 
                /* Mark the object as cached */
 
-               ACPI_MEMSET(object, 0xCA, cache->object_size);
+               memset(object, 0xCA, cache->object_size);
                ACPI_SET_DESCRIPTOR_TYPE(object, ACPI_DESC_TYPE_CACHED);
 
                /* Put the object at the head of the cache list */
@@ -281,7 +281,7 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache)
 
                /* Clear (zero) the previously used Object */
 
-               ACPI_MEMSET(object, 0, cache->object_size);
+               memset(object, 0, cache->object_size);
        } else {
                /* The cache is empty, create a new object */
 
index c37ec5035f4c5f48e207cbd5082db3f55780550d..257221d452c8839262faf80465969b0ec75d8a54 100644 (file)
@@ -129,7 +129,7 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
 
        /* Always clear the external object */
 
-       ACPI_MEMSET(external_object, 0, sizeof(union acpi_object));
+       memset(external_object, 0, sizeof(union acpi_object));
 
        /*
         * In general, the external object will be the same type as
@@ -149,9 +149,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
                                                                  string.
                                                                  length + 1);
 
-               ACPI_MEMCPY((void *)data_space,
-                           (void *)internal_object->string.pointer,
-                           (acpi_size) internal_object->string.length + 1);
+               memcpy((void *)data_space,
+                      (void *)internal_object->string.pointer,
+                      (acpi_size) internal_object->string.length + 1);
                break;
 
        case ACPI_TYPE_BUFFER:
@@ -162,9 +162,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
                    ACPI_ROUND_UP_TO_NATIVE_WORD(internal_object->string.
                                                 length);
 
-               ACPI_MEMCPY((void *)data_space,
-                           (void *)internal_object->buffer.pointer,
-                           internal_object->buffer.length);
+               memcpy((void *)data_space,
+                      (void *)internal_object->buffer.pointer,
+                      internal_object->buffer.length);
                break;
 
        case ACPI_TYPE_INTEGER:
@@ -502,9 +502,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object,
                        goto error_exit;
                }
 
-               ACPI_MEMCPY(internal_object->string.pointer,
-                           external_object->string.pointer,
-                           external_object->string.length);
+               memcpy(internal_object->string.pointer,
+                      external_object->string.pointer,
+                      external_object->string.length);
 
                internal_object->string.length = external_object->string.length;
                break;
@@ -517,9 +517,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object,
                        goto error_exit;
                }
 
-               ACPI_MEMCPY(internal_object->buffer.pointer,
-                           external_object->buffer.pointer,
-                           external_object->buffer.length);
+               memcpy(internal_object->buffer.pointer,
+                      external_object->buffer.pointer,
+                      external_object->buffer.length);
 
                internal_object->buffer.length = external_object->buffer.length;
 
@@ -694,8 +694,8 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc,
                copy_size = sizeof(struct acpi_namespace_node);
        }
 
-       ACPI_MEMCPY(ACPI_CAST_PTR(char, dest_desc),
-                   ACPI_CAST_PTR(char, source_desc), copy_size);
+       memcpy(ACPI_CAST_PTR(char, dest_desc),
+              ACPI_CAST_PTR(char, source_desc), copy_size);
 
        /* Restore the saved fields */
 
@@ -725,9 +725,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc,
 
                        /* Copy the actual buffer data */
 
-                       ACPI_MEMCPY(dest_desc->buffer.pointer,
-                                   source_desc->buffer.pointer,
-                                   source_desc->buffer.length);
+                       memcpy(dest_desc->buffer.pointer,
+                              source_desc->buffer.pointer,
+                              source_desc->buffer.length);
                }
                break;
 
@@ -747,9 +747,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc,
 
                        /* Copy the actual string data */
 
-                       ACPI_MEMCPY(dest_desc->string.pointer,
-                                   source_desc->string.pointer,
-                                   (acpi_size) source_desc->string.length + 1);
+                       memcpy(dest_desc->string.pointer,
+                              source_desc->string.pointer,
+                              (acpi_size) source_desc->string.length + 1);
                }
                break;
 
index 4f3f888d33bb189793adc849a9c3217f981927d0..cd02693841db0bdeaa51dcf747c5f06fbf1d1e2f 100644 (file)
@@ -111,8 +111,8 @@ void acpi_ut_track_stack_ptr(void)
  * RETURN:      Updated pointer to the function name
  *
  * DESCRIPTION: Remove the "Acpi" prefix from the function name, if present.
- *              This allows compiler macros such as __func__ to be used with no
- *              change to the debug output.
+ *              This allows compiler macros such as __func__ to be used
+ *              with no change to the debug output.
  *
  ******************************************************************************/
 
index 5e8df9177da44781ac07942d146e47b97e954cda..a72685c1e819660768933e84abe9d1076c953bff 100644 (file)
@@ -102,12 +102,19 @@ const struct acpi_predefined_names acpi_gbl_pre_defined_names[] = {
        {"_SB_", ACPI_TYPE_DEVICE, NULL},
        {"_SI_", ACPI_TYPE_LOCAL_SCOPE, NULL},
        {"_TZ_", ACPI_TYPE_DEVICE, NULL},
-       {"_REV", ACPI_TYPE_INTEGER, (char *)ACPI_CA_SUPPORT_LEVEL},
+       /*
+        * March, 2015:
+        * The _REV object is in the process of being deprecated, because
+        * other ACPI implementations permanently return 2. Thus, it
+        * has little or no value. Return 2 for compatibility with
+        * other ACPI implementations.
+        */
+       {"_REV", ACPI_TYPE_INTEGER, ACPI_CAST_PTR(char, 2)},
        {"_OS_", ACPI_TYPE_STRING, ACPI_OS_NAME},
-       {"_GL_", ACPI_TYPE_MUTEX, (char *)1},
+       {"_GL_", ACPI_TYPE_MUTEX, ACPI_CAST_PTR(char, 1)},
 
 #if !defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY)
-       {"_OSI", ACPI_TYPE_METHOD, (char *)1},
+       {"_OSI", ACPI_TYPE_METHOD, ACPI_CAST_PTR(char, 1)},
 #endif
 
        /* Table terminator */
index 27431cfc1c4476c6128c90f240bb8d090831486c..7956df1e263c1cb1271614cdba965d1592d9a18a 100644 (file)
@@ -1,6 +1,6 @@
 /******************************************************************************
  *
- * Module Name: utids - support for device Ids - HID, UID, CID
+ * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS
  *
  *****************************************************************************/
 
@@ -111,7 +111,7 @@ acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
        if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
                acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value);
        } else {
-               ACPI_STRCPY(hid->string, obj_desc->string.pointer);
+               strcpy(hid->string, obj_desc->string.pointer);
        }
 
        hid->length = length;
@@ -180,7 +180,7 @@ acpi_ut_execute_SUB(struct acpi_namespace_node *device_node,
 
        /* Simply copy existing string */
 
-       ACPI_STRCPY(sub->string, obj_desc->string.pointer);
+       strcpy(sub->string, obj_desc->string.pointer);
        sub->length = length;
        *return_id = sub;
 
@@ -256,7 +256,7 @@ acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
        if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
                acpi_ex_integer_to_string(uid->string, obj_desc->integer.value);
        } else {
-               ACPI_STRCPY(uid->string, obj_desc->string.pointer);
+               strcpy(uid->string, obj_desc->string.pointer);
        }
 
        uid->length = length;
@@ -393,8 +393,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
 
                        /* Copy the String CID from the returned object */
 
-                       ACPI_STRCPY(next_id_string,
-                                   cid_objects[i]->string.pointer);
+                       strcpy(next_id_string, cid_objects[i]->string.pointer);
                        length = cid_objects[i]->string.length + 1;
                }
 
@@ -416,3 +415,92 @@ cleanup:
        acpi_ut_remove_reference(obj_desc);
        return_ACPI_STATUS(status);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_CLS
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the _CLS is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _CLS control method that returns PCI-defined
+ *              class code of the device. The _CLS value is always a package
+ *              containing PCI class information as a list of integers.
+ *              The returned string has format "BBSSPP", where:
+ *                BB = Base-class code
+ *                SS = Sub-class code
+ *                PP = Programming Interface code
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_CLS(struct acpi_namespace_node *device_node,
+                   struct acpi_pnp_device_id **return_id)
+{
+       union acpi_operand_object *obj_desc;
+       union acpi_operand_object **cls_objects;
+       u32 count;
+       struct acpi_pnp_device_id *cls;
+       u32 length;
+       acpi_status status;
+       u8 class_code[3] = { 0, 0, 0 };
+
+       ACPI_FUNCTION_TRACE(ut_execute_CLS);
+
+       status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CLS,
+                                        ACPI_BTYPE_PACKAGE, &obj_desc);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+
+       /* Get the size of the String to be returned, includes null terminator */
+
+       length = ACPI_PCICLS_STRING_SIZE;
+       cls_objects = obj_desc->package.elements;
+       count = obj_desc->package.count;
+
+       if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
+               if (count > 0
+                   && cls_objects[0]->common.type == ACPI_TYPE_INTEGER) {
+                       class_code[0] = (u8)cls_objects[0]->integer.value;
+               }
+               if (count > 1
+                   && cls_objects[1]->common.type == ACPI_TYPE_INTEGER) {
+                       class_code[1] = (u8)cls_objects[1]->integer.value;
+               }
+               if (count > 2
+                   && cls_objects[2]->common.type == ACPI_TYPE_INTEGER) {
+                       class_code[2] = (u8)cls_objects[2]->integer.value;
+               }
+       }
+
+       /* Allocate a buffer for the CLS */
+
+       cls =
+           ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
+                                (acpi_size) length);
+       if (!cls) {
+               status = AE_NO_MEMORY;
+               goto cleanup;
+       }
+
+       /* Area for the string starts after PNP_DEVICE_ID struct */
+
+       cls->string =
+           ACPI_ADD_PTR(char, cls, sizeof(struct acpi_pnp_device_id));
+
+       /* Simply copy existing string */
+
+       acpi_ex_pci_cls_to_string(cls->string, class_code);
+       cls->length = length;
+       *return_id = cls;
+
+cleanup:
+
+       /* On exit, we must delete the return object */
+
+       acpi_ut_remove_reference(obj_desc);
+       return_ACPI_STATUS(status);
+}
index cbb7034d28d89d10944096ed8359caa5b6274b94..71b66537f8260daf7e09ceae9747790bcf71bd8c 100644 (file)
@@ -66,9 +66,9 @@ u8 acpi_ut_is_pci_root_bridge(char *id)
         * Check if this is a PCI root bridge.
         * ACPI 3.0+: check for a PCI Express root also.
         */
-       if (!(ACPI_STRCMP(id,
-                         PCI_ROOT_HID_STRING)) ||
-           !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) {
+       if (!(strcmp(id,
+                    PCI_ROOT_HID_STRING)) ||
+           !(strcmp(id, PCI_EXPRESS_ROOT_HID_STRING))) {
                return (TRUE);
        }
 
@@ -97,7 +97,8 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table)
 
        if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) ||
            ACPI_COMPARE_NAME(table->signature, ACPI_SIG_PSDT) ||
-           ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) {
+           ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT) ||
+           ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT)) {
                return (TRUE);
        }
 
index 44035abdbf2948000ae2ffd3861d07ff08746b6e..8f3d203aed79844ee78859ae954492f09e9fce95 100644 (file)
@@ -232,8 +232,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name)
                return (AE_NO_MEMORY);
        }
 
-       interface_info->name =
-           ACPI_ALLOCATE_ZEROED(ACPI_STRLEN(interface_name) + 1);
+       interface_info->name = ACPI_ALLOCATE_ZEROED(strlen(interface_name) + 1);
        if (!interface_info->name) {
                ACPI_FREE(interface_info);
                return (AE_NO_MEMORY);
@@ -241,7 +240,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name)
 
        /* Initialize new info and insert at the head of the global list */
 
-       ACPI_STRCPY(interface_info->name, interface_name);
+       strcpy(interface_info->name, interface_name);
        interface_info->flags = ACPI_OSI_DYNAMIC;
        interface_info->next = acpi_gbl_supported_interfaces;
 
@@ -269,7 +268,7 @@ acpi_status acpi_ut_remove_interface(acpi_string interface_name)
 
        previous_interface = next_interface = acpi_gbl_supported_interfaces;
        while (next_interface) {
-               if (!ACPI_STRCMP(interface_name, next_interface->name)) {
+               if (!strcmp(interface_name, next_interface->name)) {
 
                        /* Found: name is in either the static list or was added at runtime */
 
@@ -373,7 +372,7 @@ struct acpi_interface_info *acpi_ut_get_interface(acpi_string interface_name)
 
        next_interface = acpi_gbl_supported_interfaces;
        while (next_interface) {
-               if (!ACPI_STRCMP(interface_name, next_interface->name)) {
+               if (!strcmp(interface_name, next_interface->name)) {
                        return (next_interface);
                }
 
index 29e449935a82e5f802a96f8e3c09d257a2bb2529..97898ed71b4b3a9a9a9e8d510b83fdbe52546a71 100644 (file)
@@ -148,7 +148,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes)
        u32 j;
 
        if (!expected_btypes) {
-               ACPI_STRCPY(buffer, "NONE");
+               strcpy(buffer, "NONE");
                return;
        }
 
@@ -161,7 +161,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes)
                /* If one of the expected types, concatenate the name of this type */
 
                if (expected_btypes & this_rtype) {
-                       ACPI_STRCAT(buffer, &ut_rtype_names[i][j]);
+                       strcat(buffer, &ut_rtype_names[i][j]);
                        j = 0;  /* Use name separator from now on */
                }
 
index 2be6bd4bdc09d7662c15a9ee3ab733310be3d077..b26297c5de49d528f19e8d0aacba119f94c69a71 100644 (file)
@@ -180,7 +180,7 @@ const char *acpi_ut_scan_number(const char *string, u64 *number_ptr)
 {
        u64 number = 0;
 
-       while (ACPI_IS_DIGIT(*string)) {
+       while (isdigit((int)*string)) {
                number *= 10;
                number += *(string++) - '0';
        }
@@ -405,7 +405,7 @@ acpi_ut_vsnprintf(char *string,
                /* Process width */
 
                width = -1;
-               if (ACPI_IS_DIGIT(*format)) {
+               if (isdigit((int)*format)) {
                        format = acpi_ut_scan_number(format, &number);
                        width = (s32) number;
                } else if (*format == '*') {
@@ -422,7 +422,7 @@ acpi_ut_vsnprintf(char *string,
                precision = -1;
                if (*format == '.') {
                        ++format;
-                       if (ACPI_IS_DIGIT(*format)) {
+                       if (isdigit((int)*format)) {
                                format = acpi_ut_scan_number(format, &number);
                                precision = (s32) number;
                        } else if (*format == '*') {
index 83b6c52490dc06097d945fb0245189543adb3835..8f3c883dfe0ec305aa167bf0db0970d7cc60cf82 100644 (file)
@@ -79,7 +79,7 @@ void acpi_ut_strlwr(char *src_string)
        /* Walk entire string, lowercasing the letters */
 
        for (string = src_string; *string; string++) {
-               *string = (char)ACPI_TOLOWER(*string);
+               *string = (char)tolower((int)*string);
        }
 
        return;
@@ -145,7 +145,7 @@ void acpi_ut_strupr(char *src_string)
        /* Walk entire string, uppercasing the letters */
 
        for (string = src_string; *string; string++) {
-               *string = (char)ACPI_TOUPPER(*string);
+               *string = (char)toupper((int)*string);
        }
 
        return;
@@ -202,7 +202,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
 
        /* Skip over any white space in the buffer */
 
-       while ((*string) && (ACPI_IS_SPACE(*string) || *string == '\t')) {
+       while ((*string) && (isspace((int)*string) || *string == '\t')) {
                string++;
        }
 
@@ -211,7 +211,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
                 * Base equal to ACPI_ANY_BASE means 'ToInteger operation case'.
                 * We need to determine if it is decimal or hexadecimal.
                 */
-               if ((*string == '0') && (ACPI_TOLOWER(*(string + 1)) == 'x')) {
+               if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) {
                        sign_of0x = 1;
                        base = 16;
 
@@ -224,7 +224,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
 
        /* Any string left? Check that '0x' is not followed by white space. */
 
-       if (!(*string) || ACPI_IS_SPACE(*string) || *string == '\t') {
+       if (!(*string) || isspace((int)*string) || *string == '\t') {
                if (to_integer_op) {
                        goto error_exit;
                } else {
@@ -241,7 +241,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
        /* Main loop: convert the string to a 32- or 64-bit integer */
 
        while (*string) {
-               if (ACPI_IS_DIGIT(*string)) {
+               if (isdigit((int)*string)) {
 
                        /* Convert ASCII 0-9 to Decimal value */
 
@@ -252,8 +252,8 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
 
                        term = 1;
                } else {
-                       this_digit = (u8)ACPI_TOUPPER(*string);
-                       if (ACPI_IS_XDIGIT((char)this_digit)) {
+                       this_digit = (u8)toupper((int)*string);
+                       if (isxdigit((int)this_digit)) {
 
                                /* Convert ASCII Hex char to value */
 
@@ -404,7 +404,7 @@ void acpi_ut_print_string(char *string, u16 max_length)
 
                        /* Check for printable character or hex escape */
 
-                       if (ACPI_IS_PRINT(string[i])) {
+                       if (isprint((int)string[i])) {
                                /* This is a normal character */
 
                                acpi_os_printf("%c", (int)string[i]);
@@ -609,22 +609,22 @@ void ut_convert_backslashes(char *pathname)
 u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source)
 {
 
-       if (ACPI_STRLEN(source) >= dest_size) {
+       if (strlen(source) >= dest_size) {
                return (TRUE);
        }
 
-       ACPI_STRCPY(dest, source);
+       strcpy(dest, source);
        return (FALSE);
 }
 
 u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source)
 {
 
-       if ((ACPI_STRLEN(dest) + ACPI_STRLEN(source)) >= dest_size) {
+       if ((strlen(dest) + strlen(source)) >= dest_size) {
                return (TRUE);
        }
 
-       ACPI_STRCAT(dest, source);
+       strcat(dest, source);
        return (FALSE);
 }
 
@@ -635,14 +635,13 @@ acpi_ut_safe_strncat(char *dest,
 {
        acpi_size actual_transfer_length;
 
-       actual_transfer_length =
-           ACPI_MIN(max_transfer_length, ACPI_STRLEN(source));
+       actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source));
 
-       if ((ACPI_STRLEN(dest) + actual_transfer_length) >= dest_size) {
+       if ((strlen(dest) + actual_transfer_length) >= dest_size) {
                return (TRUE);
        }
 
-       ACPI_STRNCAT(dest, source, max_transfer_length);
+       strncat(dest, source, max_transfer_length);
        return (FALSE);
 }
 #endif
index 130dd9f96f0fe72ee03f79f6fa3631d70e608086..9a7dc8196a5da76779f10c855d17fc0e5f4430c8 100644 (file)
@@ -100,7 +100,7 @@ acpi_ut_create_list(char *list_name,
                return (AE_NO_MEMORY);
        }
 
-       ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list));
+       memset(cache, 0, sizeof(struct acpi_memory_list));
 
        cache->list_name = list_name;
        cache->object_size = object_size;
@@ -402,7 +402,7 @@ acpi_ut_track_allocation(struct acpi_debug_mem_block *allocation,
        allocation->component = component;
        allocation->line = line;
 
-       ACPI_STRNCPY(allocation->module, module, ACPI_MAX_MODULE_NAME);
+       strncpy(allocation->module, module, ACPI_MAX_MODULE_NAME);
        allocation->module[ACPI_MAX_MODULE_NAME - 1] = 0;
 
        if (!element) {
@@ -497,7 +497,7 @@ acpi_ut_remove_allocation(struct acpi_debug_mem_block *allocation,
 
        /* Mark the segment as deleted */
 
-       ACPI_MEMSET(&allocation->user_space, 0xEA, allocation->size);
+       memset(&allocation->user_space, 0xEA, allocation->size);
 
        status = acpi_ut_release_mutex(ACPI_MTX_MEMORY);
        return (status);
@@ -595,7 +595,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module)
        while (element) {
                if ((element->component & component) &&
                    ((module == NULL)
-                    || (0 == ACPI_STRCMP(module, element->module)))) {
+                    || (0 == strcmp(module, element->module)))) {
                        descriptor =
                            ACPI_CAST_PTR(union acpi_descriptor,
                                          &element->user_space);
index 0929187bdce09c74f07e8d1a68f48eed4f2880e6..51cf52d52243c99233728a2d00d73e6171b38a28 100644 (file)
@@ -234,8 +234,8 @@ acpi_status acpi_get_statistics(struct acpi_statistics *stats)
        stats->sci_count = acpi_sci_count;
        stats->gpe_count = acpi_gpe_count;
 
-       ACPI_MEMCPY(stats->fixed_event_count, acpi_fixed_event_count,
-                   sizeof(acpi_fixed_event_count));
+       memcpy(stats->fixed_event_count, acpi_fixed_event_count,
+              sizeof(acpi_fixed_event_count));
 
        /* Other counters */
 
@@ -322,7 +322,7 @@ acpi_status acpi_install_interface(acpi_string interface_name)
 
        /* Parameter validation */
 
-       if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) {
+       if (!interface_name || (strlen(interface_name) == 0)) {
                return (AE_BAD_PARAMETER);
        }
 
@@ -374,7 +374,7 @@ acpi_status acpi_remove_interface(acpi_string interface_name)
 
        /* Parameter validation */
 
-       if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) {
+       if (!interface_name || (strlen(interface_name) == 0)) {
                return (AE_BAD_PARAMETER);
        }
 
index 083a76891889244596453059885ef3007aab8320..42a32a66ef22a9fc87558aa1e8901259146519be 100644 (file)
@@ -179,10 +179,12 @@ acpi_status __init acpi_enable_subsystem(u32 flags)
         * Obtain a permanent mapping for the FACS. This is required for the
         * Global Lock and the Firmware Waking Vector
         */
-       status = acpi_tb_initialize_facs();
-       if (ACPI_FAILURE(status)) {
-               ACPI_WARNING((AE_INFO, "Could not map the FACS table"));
-               return_ACPI_STATUS(status);
+       if (!(flags & ACPI_NO_FACS_INIT)) {
+               status = acpi_tb_initialize_facs();
+               if (ACPI_FAILURE(status)) {
+                       ACPI_WARNING((AE_INFO, "Could not map the FACS table"));
+                       return_ACPI_STATUS(status);
+               }
        }
 #endif                         /* !ACPI_REDUCED_HARDWARE */
 
index 1d1791935c318c71148a5da573effe51f9031a15..278dc4be992a49b7663223a5c6bf2215699f4c20 100644 (file)
@@ -162,6 +162,15 @@ static int __init dmi_disable_osi_win8(const struct dmi_system_id *d)
        acpi_osi_setup("!Windows 2012");
        return 0;
 }
+#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
+static int __init dmi_enable_rev_override(const struct dmi_system_id *d)
+{
+       printk(KERN_NOTICE PREFIX "DMI detected: %s (force ACPI _REV to 5)\n",
+              d->ident);
+       acpi_rev_override_setup(NULL);
+       return 0;
+}
+#endif
 
 static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
        {
@@ -325,6 +334,23 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
                     DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"),
                },
        },
+
+#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
+       /*
+        * DELL XPS 13 (2015) switches sound between HDA and I2S
+        * depending on the ACPI _REV callback. If userspace supports
+        * I2S sufficiently (or if you do not care about sound), you
+        * can safely disable this quirk.
+        */
+       {
+        .callback = dmi_enable_rev_override,
+        .ident = "DELL XPS 13 (2015)",
+        .matches = {
+                     DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                     DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343"),
+               },
+       },
+#endif
        {}
 };
 
index 787c629bc9b41e83ac11496f76bb646437d3b077..4683a96932b917fcc5f3efc04e290eb07012efad 100644 (file)
@@ -58,6 +58,7 @@ void acpi_cmos_rtc_init(void);
 #else
 static inline void acpi_cmos_rtc_init(void) {}
 #endif
+int acpi_rev_override_setup(char *str);
 
 extern bool acpi_force_hot_remove;
 
index a5dc9034efeeda6f06044521600518f70bd322d3..c262e4acd68d827cba1273d79e28515c6ebe95fa 100644 (file)
@@ -530,6 +530,19 @@ acpi_os_get_physical_address(void *virt, acpi_physical_address * phys)
 }
 #endif
 
+#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
+static bool acpi_rev_override;
+
+int __init acpi_rev_override_setup(char *str)
+{
+       acpi_rev_override = true;
+       return 1;
+}
+__setup("acpi_rev_override", acpi_rev_override_setup);
+#else
+#define acpi_rev_override      false
+#endif
+
 #define ACPI_MAX_OVERRIDE_LEN 100
 
 static char acpi_os_name[ACPI_MAX_OVERRIDE_LEN];
@@ -548,6 +561,11 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
                *new_val = acpi_os_name;
        }
 
+       if (!memcmp(init_val->name, "_REV", 4) && acpi_rev_override) {
+               printk(KERN_INFO PREFIX "Overriding _REV return value to 5\n");
+               *new_val = (char *)5;
+       }
+
        return AE_OK;
 }
 
index ec6c5c6e1ac94b2bcbe0619a7fe62b9e7d0ce4a5..d94529d5c8e951378eaf62d74b708edf271a550f 100644 (file)
@@ -346,6 +346,7 @@ struct rbd_device {
        struct rbd_image_header header;
        unsigned long           flags;          /* possibly lock protected */
        struct rbd_spec         *spec;
+       struct rbd_options      *opts;
 
        char                    *header_name;
 
@@ -724,34 +725,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
 }
 
 /*
- * mount options
+ * (Per device) rbd map options
  */
 enum {
+       Opt_queue_depth,
        Opt_last_int,
        /* int args above */
        Opt_last_string,
        /* string args above */
        Opt_read_only,
        Opt_read_write,
-       /* Boolean args above */
-       Opt_last_bool,
+       Opt_err
 };
 
 static match_table_t rbd_opts_tokens = {
+       {Opt_queue_depth, "queue_depth=%d"},
        /* int args above */
        /* string args above */
        {Opt_read_only, "read_only"},
        {Opt_read_only, "ro"},          /* Alternate spelling */
        {Opt_read_write, "read_write"},
        {Opt_read_write, "rw"},         /* Alternate spelling */
-       /* Boolean args above */
-       {-1, NULL}
+       {Opt_err, NULL}
 };
 
 struct rbd_options {
+       int     queue_depth;
        bool    read_only;
 };
 
+#define RBD_QUEUE_DEPTH_DEFAULT        BLKDEV_MAX_RQ
 #define RBD_READ_ONLY_DEFAULT  false
 
 static int parse_rbd_opts_token(char *c, void *private)
@@ -761,27 +764,27 @@ static int parse_rbd_opts_token(char *c, void *private)
        int token, intval, ret;
 
        token = match_token(c, rbd_opts_tokens, argstr);
-       if (token < 0)
-               return -EINVAL;
-
        if (token < Opt_last_int) {
                ret = match_int(&argstr[0], &intval);
                if (ret < 0) {
-                       pr_err("bad mount option arg (not int) "
-                              "at '%s'\n", c);
+                       pr_err("bad mount option arg (not int) at '%s'\n", c);
                        return ret;
                }
                dout("got int token %d val %d\n", token, intval);
        } else if (token > Opt_last_int && token < Opt_last_string) {
-               dout("got string token %d val %s\n", token,
-                    argstr[0].from);
-       } else if (token > Opt_last_string && token < Opt_last_bool) {
-               dout("got Boolean token %d\n", token);
+               dout("got string token %d val %s\n", token, argstr[0].from);
        } else {
                dout("got token %d\n", token);
        }
 
        switch (token) {
+       case Opt_queue_depth:
+               if (intval < 1) {
+                       pr_err("queue_depth out of range\n");
+                       return -EINVAL;
+               }
+               rbd_opts->queue_depth = intval;
+               break;
        case Opt_read_only:
                rbd_opts->read_only = true;
                break;
@@ -789,9 +792,10 @@ static int parse_rbd_opts_token(char *c, void *private)
                rbd_opts->read_only = false;
                break;
        default:
-               rbd_assert(false);
-               break;
+               /* libceph prints "bad option" msg */
+               return -EINVAL;
        }
+
        return 0;
 }
 
@@ -1563,22 +1567,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
 /*
  * Wait for an object request to complete.  If interrupted, cancel the
  * underlying osd request.
+ *
+ * @timeout: in jiffies, 0 means "wait forever"
  */
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
+                                 unsigned long timeout)
 {
-       int ret;
+       long ret;
 
        dout("%s %p\n", __func__, obj_request);
-
-       ret = wait_for_completion_interruptible(&obj_request->completion);
-       if (ret < 0) {
-               dout("%s %p interrupted\n", __func__, obj_request);
+       ret = wait_for_completion_interruptible_timeout(
+                                       &obj_request->completion,
+                                       ceph_timeout_jiffies(timeout));
+       if (ret <= 0) {
+               if (ret == 0)
+                       ret = -ETIMEDOUT;
                rbd_obj_request_end(obj_request);
-               return ret;
+       } else {
+               ret = 0;
        }
 
-       dout("%s %p done\n", __func__, obj_request);
-       return 0;
+       dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
+       return ret;
+}
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+       return __rbd_obj_request_wait(obj_request, 0);
+}
+
+static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request,
+                                       unsigned long timeout)
+{
+       return __rbd_obj_request_wait(obj_request, timeout);
 }
 
 static void rbd_img_request_complete(struct rbd_img_request *img_request)
@@ -2001,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
        rbd_assert(obj_request_type_valid(type));
 
        size = strlen(object_name) + 1;
-       name = kmalloc(size, GFP_KERNEL);
+       name = kmalloc(size, GFP_NOIO);
        if (!name)
                return NULL;
 
-       obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+       obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
        if (!obj_request) {
                kfree(name);
                return NULL;
@@ -2376,7 +2397,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
        }
 
        if (opcode == CEPH_OSD_OP_DELETE)
-               osd_req_op_init(osd_request, num_ops, opcode);
+               osd_req_op_init(osd_request, num_ops, opcode, 0);
        else
                osd_req_op_extent_init(osd_request, num_ops, opcode,
                                       offset, length, 0, 0);
@@ -2848,7 +2869,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
                goto out;
        stat_request->callback = rbd_img_obj_exists_callback;
 
-       osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+       osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
        osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
                                        false, false);
        rbd_osd_req_format_read(stat_request);
@@ -3122,6 +3143,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
                                                bool watch)
 {
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct ceph_options *opts = osdc->client->options;
        struct rbd_obj_request *obj_request;
        int ret;
 
@@ -3148,7 +3170,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
        if (ret)
                goto out;
 
-       ret = rbd_obj_request_wait(obj_request);
+       ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout);
        if (ret)
                goto out;
 
@@ -3750,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 
        memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
        rbd_dev->tag_set.ops = &rbd_mq_ops;
-       rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ;
+       rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
        rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
-       rbd_dev->tag_set.flags =
-               BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+       rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
        rbd_dev->tag_set.nr_hw_queues = 1;
        rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
 
@@ -3773,6 +3794,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
        /* set io sizes to object size */
        segment_size = rbd_obj_bytes(&rbd_dev->header);
        blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+       blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
        blk_queue_max_segment_size(q, segment_size);
        blk_queue_io_min(q, segment_size);
        blk_queue_io_opt(q, segment_size);
@@ -4044,7 +4066,8 @@ static void rbd_spec_free(struct kref *kref)
 }
 
 static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
-                               struct rbd_spec *spec)
+                                        struct rbd_spec *spec,
+                                        struct rbd_options *opts)
 {
        struct rbd_device *rbd_dev;
 
@@ -4058,8 +4081,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
        INIT_LIST_HEAD(&rbd_dev->node);
        init_rwsem(&rbd_dev->header_rwsem);
 
-       rbd_dev->spec = spec;
        rbd_dev->rbd_client = rbdc;
+       rbd_dev->spec = spec;
+       rbd_dev->opts = opts;
 
        /* Initialize the layout used for all rbd requests */
 
@@ -4075,6 +4099,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev)
 {
        rbd_put_client(rbd_dev->rbd_client);
        rbd_spec_put(rbd_dev->spec);
+       kfree(rbd_dev->opts);
        kfree(rbd_dev);
 }
 
@@ -4933,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf,
                goto out_mem;
 
        rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+       rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
 
        copts = ceph_parse_options(options, mon_addrs,
                                        mon_addrs + mon_addrs_size - 1,
@@ -4963,8 +4989,8 @@ out_err:
  */
 static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
 {
+       struct ceph_options *opts = rbdc->client->options;
        u64 newest_epoch;
-       unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
        int tries = 0;
        int ret;
 
@@ -4979,7 +5005,8 @@ again:
                if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
                        ceph_monc_request_next_osdmap(&rbdc->client->monc);
                        (void) ceph_monc_wait_osdmap(&rbdc->client->monc,
-                                                    newest_epoch, timeout);
+                                                    newest_epoch,
+                                                    opts->mount_timeout);
                        goto again;
                } else {
                        /* the osdmap we have is new enough */
@@ -5148,7 +5175,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
        rbdc = __rbd_get_client(rbd_dev->rbd_client);
 
        ret = -ENOMEM;
-       parent = rbd_dev_create(rbdc, parent_spec);
+       parent = rbd_dev_create(rbdc, parent_spec, NULL);
        if (!parent)
                goto out_err;
 
@@ -5394,9 +5421,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
        rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
        if (rc < 0)
                goto err_out_module;
-       read_only = rbd_opts->read_only;
-       kfree(rbd_opts);
-       rbd_opts = NULL;        /* done with this */
 
        rbdc = rbd_get_client(ceph_opts);
        if (IS_ERR(rbdc)) {
@@ -5422,11 +5446,12 @@ static ssize_t do_rbd_add(struct bus_type *bus,
                goto err_out_client;
        }
 
-       rbd_dev = rbd_dev_create(rbdc, spec);
+       rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
        if (!rbd_dev)
                goto err_out_client;
        rbdc = NULL;            /* rbd_dev now owns this */
        spec = NULL;            /* rbd_dev now owns this */
+       rbd_opts = NULL;        /* rbd_dev now owns this */
 
        rc = rbd_dev_image_probe(rbd_dev, true);
        if (rc < 0)
@@ -5434,6 +5459,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 
        /* If we are mapping a snapshot it must be marked read-only */
 
+       read_only = rbd_dev->opts->read_only;
        if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
                read_only = true;
        rbd_dev->mapping.read_only = read_only;
@@ -5458,6 +5484,7 @@ err_out_client:
        rbd_put_client(rbdc);
 err_out_args:
        rbd_spec_put(spec);
+       kfree(rbd_opts);
 err_out_module:
        module_put(THIS_MODULE);
 
index 0b4188b9af7cd055571851d9f0e0e25e7d03033c..c6dea3f6917bdcfc144fc70540cbbd26ea1918ee 100644 (file)
@@ -581,7 +581,7 @@ static inline int needs_ilk_vtd_wa(void)
        /* Query intel_iommu to see if we need the workaround. Presumably that
         * was loaded first.
         */
-       if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB ||
+       if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG ||
             gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) &&
             intel_iommu_gfx_mapped)
                return 1;
index 86cdb3a28629ab09394292e2b210eb5b689b4504..446c2fe76dc28635b28b0fb116aaf5cd7e79e30e 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/max77686.h>
 #include <linux/mfd/max77686-private.h>
index 0729dc723a8ff81099bc9e0071a9784f6fd9e4d3..74c49b93a6eba5c40f28342f7d4ab4225c477795 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/max77686-private.h>
 #include <linux/clk-provider.h>
index 05e04ce0f1488f7301ad5153f687860bf872f8bc..c9487179f25f46d79e511ace153e3a3e7078e49e 100644 (file)
@@ -503,8 +503,7 @@ static int __init nomadik_src_clk_init_debugfs(void)
                            NULL, NULL, &nomadik_src_clk_debugfs_ops);
        return 0;
 }
-
-module_init(nomadik_src_clk_init_debugfs);
+device_initcall(nomadik_src_clk_init_debugfs);
 
 #endif
 
index ec8f5a1fca09f4240c433a1de2ea8207e6e369e5..9d028aec58e5d8addc3a080ced48594db57c1657 100644 (file)
@@ -128,7 +128,7 @@ static struct platform_driver sun4i_a10_mod0_clk_driver = {
        },
        .probe = sun4i_a10_mod0_clk_probe,
 };
-module_platform_driver(sun4i_a10_mod0_clk_driver);
+builtin_platform_driver(sun4i_a10_mod0_clk_driver);
 
 static const struct factors_data sun9i_a80_mod0_data __initconst = {
        .enable = 31,
index fb24aaf4adcf8099c04a4f2de5eb558a56292707..ae5b2bd3a9785c63646e3e922fbe17330678b481 100644 (file)
@@ -10,6 +10,7 @@
 */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
index b0dac7d6ba31475b9902eefaff8c9dab557f34d2..9e231f52150c404ebd92e6d74ea6a24b5642576a 100644 (file)
@@ -659,4 +659,4 @@ static struct platform_driver s5pv210_cpufreq_platdrv = {
        },
        .probe = s5pv210_cpufreq_probe,
 };
-module_platform_driver(s5pv210_cpufreq_platdrv);
+builtin_platform_driver(s5pv210_cpufreq_platdrv);
index f2446c78d87cf14eda9cec13ab05d9bd0b9198a9..9c5853b6ca4a2f47039dce98ab13e722dfdc05f4 100644 (file)
@@ -62,5 +62,4 @@ static struct platform_driver at91_cpuidle_driver = {
        },
        .probe = at91_cpuidle_probe,
 };
-
-module_platform_driver(at91_cpuidle_driver);
+builtin_platform_driver(at91_cpuidle_driver);
index 9445e6cc02be4336b396d8125d95db5577b0ad13..c13feec89ea1d68c9734553110c56aeee2b812c2 100644 (file)
@@ -75,5 +75,4 @@ static struct platform_driver calxeda_cpuidle_plat_driver = {
         },
         .probe = calxeda_cpuidle_probe,
 };
-
-module_platform_driver(calxeda_cpuidle_plat_driver);
+builtin_platform_driver(calxeda_cpuidle_plat_driver);
index 543292b1d38ea045e9d9504c59d75ba4678a0ce2..6f4257fc56e5192f0b743a00d7bafcbb3b5566c3 100644 (file)
@@ -73,5 +73,4 @@ static struct platform_driver zynq_cpuidle_driver = {
        },
        .probe = zynq_cpuidle_probe,
 };
-
-module_platform_driver(zynq_cpuidle_driver);
+builtin_platform_driver(zynq_cpuidle_driver);
index 5bcd575fa96f1b80d3e9a15cabc2898c79d50fdf..e6b658faef63a37df57e497b45637f7815e7dbf5 100644 (file)
@@ -1034,8 +1034,8 @@ static int mv_cesa_get_sram(struct platform_device *pdev,
                             &sram_size);
 
        cp->sram_size = sram_size;
-       cp->sram_pool = of_get_named_gen_pool(pdev->dev.of_node,
-                                             "marvell,crypto-srams", 0);
+       cp->sram_pool = of_gen_pool_get(pdev->dev.of_node,
+                                       "marvell,crypto-srams", 0);
        if (cp->sram_pool) {
                cp->sram = gen_pool_dma_alloc(cp->sram_pool, sram_size,
                                              &cp->sram_dma);
index 7e98084d36451efe894f5f33c1a3b77748183af3..afea7fc625ccb30bec2c3f69e1dadf1b74dadac2 100644 (file)
@@ -151,7 +151,7 @@ static int octeon_l2c_probe(struct platform_device *pdev)
        l2c->ctl_name = "octeon_l2c_err";
 
 
-       if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
+       if (OCTEON_IS_OCTEON1PLUS()) {
                union cvmx_l2t_err l2t_err;
                union cvmx_l2d_err l2d_err;
 
index bb19e0732681ce6a4af6c2e980c0c4bb382889f9..cda6dab5067a57709393fad7913a2e76a531a23b 100644 (file)
@@ -234,7 +234,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev)
        layers[0].size = 1;
        layers[0].is_virt_csrow = false;
 
-       if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
+       if (OCTEON_IS_OCTEON1PLUS()) {
                union cvmx_lmcx_mem_cfg0 cfg0;
 
                cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0));
index 0f83c33a7d1fcbb08180d60609aafc92e76a6425..2ab6cf24c9598f0be36dba02cb6f024495e667da 100644 (file)
@@ -73,7 +73,7 @@ static int  co_cache_error_event(struct notifier_block *this,
                        edac_device_handle_ce(p->ed, cpu, 0, "dcache");
 
                /* Clear the error indication */
-               if (OCTEON_IS_MODEL(OCTEON_FAM_2))
+               if (OCTEON_IS_OCTEON2())
                        write_octeon_c0_dcacheerr(1);
                else
                        write_octeon_c0_dcacheerr(0);
index 22866d1c3d69c196bdb332f7c056d9e4a2a5005f..01657830b470a49e8209fd39fa829d4a1fbb3610 100644 (file)
@@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
                                   unsigned irq_type);
 int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
                      struct amdgpu_fence **fence);
+int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
+                         uint64_t seq, struct amdgpu_fence **fence);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible);
 int amdgpu_fence_wait_any(struct amdgpu_device *adev,
                          struct amdgpu_fence **fences,
                          bool intr);
-long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
-                                  u64 *target_seq, bool intr,
-                                  long timeout);
 struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
 void amdgpu_fence_unref(struct amdgpu_fence **fence);
 
@@ -1622,6 +1621,7 @@ struct amdgpu_vce {
        unsigned                fb_version;
        atomic_t                handles[AMDGPU_MAX_VCE_HANDLES];
        struct drm_file         *filp[AMDGPU_MAX_VCE_HANDLES];
+       uint32_t                img_size[AMDGPU_MAX_VCE_HANDLES];
        struct delayed_work     idle_work;
        const struct firmware   *fw;    /* VCE firmware */
        struct amdgpu_ring      ring[AMDGPU_MAX_VCE_RINGS];
index 36d34e0afbc3a5cacb51d590af6506c62a7d58ee..f82a2dd83874dea20c7e7b2a6ddf8aee74e0fe1d 100644 (file)
@@ -30,6 +30,7 @@
 
 #include <drm/drmP.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 
 static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
                                 struct amdgpu_bo_list **result,
@@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
                        gws_obj = entry->robj;
                if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
                        oa_obj = entry->robj;
+
+               trace_amdgpu_bo_list_set(list, entry->robj);
        }
 
        for (i = 0; i < list->num_entries; ++i)
index f09b2cba40ca505649decf23a27b60850b62407a..d63135bf29c0c258f72025fa6f41f34677576ec4 100644 (file)
@@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                }
                p->chunks[i].chunk_id = user_chunk.chunk_id;
                p->chunks[i].length_dw = user_chunk.length_dw;
-               if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB)
-                       p->num_ibs++;
 
                size = p->chunks[i].length_dw;
                cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
@@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                        goto out;
                }
 
-               if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE) {
+               switch (p->chunks[i].chunk_id) {
+               case AMDGPU_CHUNK_ID_IB:
+                       p->num_ibs++;
+                       break;
+
+               case AMDGPU_CHUNK_ID_FENCE:
                        size = sizeof(struct drm_amdgpu_cs_chunk_fence);
                        if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) {
                                uint32_t handle;
@@ -221,6 +224,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                                r = -EINVAL;
                                goto out;
                        }
+                       break;
+
+               case AMDGPU_CHUNK_ID_DEPENDENCIES:
+                       break;
+
+               default:
+                       r = -EINVAL;
+                       goto out;
                }
        }
 
@@ -445,8 +456,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
        for (i = 0; i < parser->nchunks; i++)
                drm_free_large(parser->chunks[i].kdata);
        kfree(parser->chunks);
-       for (i = 0; i < parser->num_ibs; i++)
-               amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+       if (parser->ibs)
+               for (i = 0; i < parser->num_ibs; i++)
+                       amdgpu_ib_free(parser->adev, &parser->ibs[i]);
        kfree(parser->ibs);
        if (parser->uf.bo)
                drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
@@ -654,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
        return 0;
 }
 
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+                                 struct amdgpu_cs_parser *p)
+{
+       struct amdgpu_ib *ib;
+       int i, j, r;
+
+       if (!p->num_ibs)
+               return 0;
+
+       /* Add dependencies to first IB */
+       ib = &p->ibs[0];
+       for (i = 0; i < p->nchunks; ++i) {
+               struct drm_amdgpu_cs_chunk_dep *deps;
+               struct amdgpu_cs_chunk *chunk;
+               unsigned num_deps;
+
+               chunk = &p->chunks[i];
+
+               if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
+                       continue;
+
+               deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+               num_deps = chunk->length_dw * 4 /
+                       sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+               for (j = 0; j < num_deps; ++j) {
+                       struct amdgpu_fence *fence;
+                       struct amdgpu_ring *ring;
+
+                       r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
+                                              deps[j].ip_instance,
+                                              deps[j].ring, &ring);
+                       if (r)
+                               return r;
+
+                       r = amdgpu_fence_recreate(ring, p->filp,
+                                                 deps[j].handle,
+                                                 &fence);
+                       if (r)
+                               return r;
+
+                       amdgpu_sync_fence(&ib->sync, fence);
+                       amdgpu_fence_unref(&fence);
+               }
+       }
+
+       return 0;
+}
+
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
        struct amdgpu_device *adev = dev->dev_private;
@@ -688,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                        else
                                DRM_ERROR("Failed to process the buffer list %d!\n", r);
                }
-       } else {
+       }
+
+       if (!r) {
                reserved_buffers = true;
                r = amdgpu_cs_ib_fill(adev, &parser);
        }
 
+       if (!r)
+               r = amdgpu_cs_dependencies(adev, &parser);
+
        if (r) {
                amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
                up_read(&adev->exclusive_lock);
@@ -730,9 +796,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 {
        union drm_amdgpu_wait_cs *wait = data;
        struct amdgpu_device *adev = dev->dev_private;
-       uint64_t seq[AMDGPU_MAX_RINGS] = {0};
-       struct amdgpu_ring *ring = NULL;
        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+       struct amdgpu_fence *fence = NULL;
+       struct amdgpu_ring *ring = NULL;
        struct amdgpu_ctx *ctx;
        long r;
 
@@ -745,9 +811,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
        if (r)
                return r;
 
-       seq[ring->idx] = wait->in.handle;
+       r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
+       if (r)
+               return r;
 
-       r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout);
+       r = fence_wait_timeout(&fence->base, true, timeout);
+       amdgpu_fence_unref(&fence);
        amdgpu_ctx_put(ctx);
        if (r < 0)
                return r;
index fec487d1c870ae27ea586ddda2def8b6c45272e4..ba46be361c9b2c9f40bf0acb751e65b156d4b171 100644 (file)
@@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
                return -EINVAL;
        }
 
-
+       adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
+       if (adev->ip_block_enabled == NULL)
+               return -ENOMEM;
 
        if (adev->ip_blocks == NULL) {
                DRM_ERROR("No IP blocks found!\n");
@@ -1575,8 +1577,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
        amdgpu_fence_driver_fini(adev);
        amdgpu_fbdev_fini(adev);
        r = amdgpu_fini(adev);
-       if (adev->ip_block_enabled)
-               kfree(adev->ip_block_enabled);
+       kfree(adev->ip_block_enabled);
        adev->ip_block_enabled = NULL;
        adev->accel_working = false;
        /* free i2c buses */
@@ -2000,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor)
 void amdgpu_debugfs_cleanup(struct drm_minor *minor)
 {
 }
+#else
+static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+       return 0;
+}
+static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
 #endif
index 5c9918d01bf984b75e2fae95daaabf7d46cf6414..a7189a1fa6a17dc308075535c6ddc0fcf3403270 100644 (file)
@@ -135,6 +135,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
        return 0;
 }
 
+/**
+ * amdgpu_fence_recreate - recreate a fence from an user fence
+ *
+ * @ring: ring the fence is associated with
+ * @owner: creator of the fence
+ * @seq: user fence sequence number
+ * @fence: resulting amdgpu fence object
+ *
+ * Recreates a fence command from the user fence sequence number (all asics).
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
+                         uint64_t seq, struct amdgpu_fence **fence)
+{
+       struct amdgpu_device *adev = ring->adev;
+
+       if (seq > ring->fence_drv.sync_seq[ring->idx])
+               return -EINVAL;
+
+       *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+       if ((*fence) == NULL)
+               return -ENOMEM;
+
+       (*fence)->seq = seq;
+       (*fence)->ring = ring;
+       (*fence)->owner = owner;
+       fence_init(&(*fence)->base, &amdgpu_fence_ops,
+               &adev->fence_queue.lock, adev->fence_context + ring->idx,
+               (*fence)->seq);
+       return 0;
+}
+
 /**
  * amdgpu_fence_check_signaled - callback from fence_queue
  *
@@ -517,12 +549,14 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq)
  * the wait timeout, or an error for all other cases.
  * -EDEADLK is returned when a GPU lockup has been detected.
  */
-long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq,
-                                  bool intr, long timeout)
+static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
+                                         u64 *target_seq, bool intr,
+                                         long timeout)
 {
        uint64_t last_seq[AMDGPU_MAX_RINGS];
        bool signaled;
-       int i, r;
+       int i;
+       long r;
 
        if (timeout == 0) {
                return amdgpu_fence_any_seq_signaled(adev, target_seq);
@@ -1023,7 +1057,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 
                amdgpu_fence_process(ring);
 
-               seq_printf(m, "--- ring %d ---\n", i);
+               seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
                seq_printf(m, "Last signaled fence 0x%016llx\n",
                           (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
                seq_printf(m, "Last emitted        0x%016llx\n",
@@ -1031,7 +1065,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 
                for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
                        struct amdgpu_ring *other = adev->rings[j];
-                       if (i != j && other && other->fence_drv.initialized)
+                       if (i != j && other && other->fence_drv.initialized &&
+                           ring->fence_drv.sync_seq[j])
                                seq_printf(m, "Last sync to ring %d 0x%016llx\n",
                                           j, ring->fence_drv.sync_seq[j]);
                }
index 0ec222295feeb50e2a0798d7c091067ff2c3975b..975edb1000a202e3dcd2b7cbe2cc2c8916aa855f 100644 (file)
@@ -496,7 +496,7 @@ error_unreserve:
 error_free:
        drm_free_large(vm_bos);
 
-       if (r)
+       if (r && r != -ERESTARTSYS)
                DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
 }
 
@@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                return -EINVAL;
        }
 
-       invalid_flags = ~(AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
-                       AMDGPU_VM_PAGE_EXECUTABLE);
+       invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE |
+                       AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE);
        if ((args->flags & invalid_flags)) {
                dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n",
                        args->flags, invalid_flags);
@@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                break;
        }
 
-       if (!r)
+       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
                amdgpu_gem_va_update_vm(adev, bo_va);
 
        drm_gem_object_unreference_unlocked(gobj);
index b56dd64bd4ea78fa4f10dec8ddeb5bdbde0ff3a8..961d7265c286524956e1100b14f084eb6e5341b0 100644 (file)
@@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs,
            TP_PROTO(struct amdgpu_cs_parser *p, int i),
            TP_ARGS(p, i),
            TP_STRUCT__entry(
+                            __field(struct amdgpu_bo_list *, bo_list)
                             __field(u32, ring)
                             __field(u32, dw)
                             __field(u32, fences)
                             ),
 
            TP_fast_assign(
+                          __entry->bo_list = p->bo_list;
                           __entry->ring = p->ibs[i].ring->idx;
                           __entry->dw = p->ibs[i].length_dw;
                           __entry->fences = amdgpu_fence_count_emitted(
                                p->ibs[i].ring);
                           ),
-           TP_printk("ring=%u, dw=%u, fences=%u",
-                     __entry->ring, __entry->dw,
+           TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
+                     __entry->bo_list, __entry->ring, __entry->dw,
                      __entry->fences)
 );
 
@@ -61,6 +63,54 @@ TRACE_EVENT(amdgpu_vm_grab_id,
            TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
 );
 
+TRACE_EVENT(amdgpu_vm_bo_map,
+           TP_PROTO(struct amdgpu_bo_va *bo_va,
+                    struct amdgpu_bo_va_mapping *mapping),
+           TP_ARGS(bo_va, mapping),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_bo *, bo)
+                            __field(long, start)
+                            __field(long, last)
+                            __field(u64, offset)
+                            __field(u32, flags)
+                            ),
+
+           TP_fast_assign(
+                          __entry->bo = bo_va->bo;
+                          __entry->start = mapping->it.start;
+                          __entry->last = mapping->it.last;
+                          __entry->offset = mapping->offset;
+                          __entry->flags = mapping->flags;
+                          ),
+           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+                     __entry->bo, __entry->start, __entry->last,
+                     __entry->offset, __entry->flags)
+);
+
+TRACE_EVENT(amdgpu_vm_bo_unmap,
+           TP_PROTO(struct amdgpu_bo_va *bo_va,
+                    struct amdgpu_bo_va_mapping *mapping),
+           TP_ARGS(bo_va, mapping),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_bo *, bo)
+                            __field(long, start)
+                            __field(long, last)
+                            __field(u64, offset)
+                            __field(u32, flags)
+                            ),
+
+           TP_fast_assign(
+                          __entry->bo = bo_va->bo;
+                          __entry->start = mapping->it.start;
+                          __entry->last = mapping->it.last;
+                          __entry->offset = mapping->offset;
+                          __entry->flags = mapping->flags;
+                          ),
+           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+                     __entry->bo, __entry->start, __entry->last,
+                     __entry->offset, __entry->flags)
+);
+
 TRACE_EVENT(amdgpu_vm_bo_update,
            TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
            TP_ARGS(mapping),
@@ -121,6 +171,21 @@ TRACE_EVENT(amdgpu_vm_flush,
                      __entry->pd_addr, __entry->ring, __entry->id)
 );
 
+TRACE_EVENT(amdgpu_bo_list_set,
+           TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
+           TP_ARGS(list, bo),
+           TP_STRUCT__entry(
+                            __field(struct amdgpu_bo_list *, list)
+                            __field(struct amdgpu_bo *, bo)
+                            ),
+
+           TP_fast_assign(
+                          __entry->list = list;
+                          __entry->bo = bo;
+                          ),
+           TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
+);
+
 DECLARE_EVENT_CLASS(amdgpu_fence_request,
 
            TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
index d3706a4982933a35d09e36ec946f3bed959430fc..dd3415d2e45dcbb2f3cba5fa1ca6688ef779cfd5 100644 (file)
@@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
                return 0;
 
        if (gtt && gtt->userptr) {
-               ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
+               ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
                if (!ttm->sg)
                        return -ENOMEM;
 
index 1127a504f11854f421ee2e202a96472094745ad4..d3ca73090e39d94f8eaf0762dcb22b4209a07712 100644 (file)
@@ -464,28 +464,42 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
  * @p: parser context
  * @lo: address of lower dword
  * @hi: address of higher dword
+ * @size: minimum size
  *
  * Patch relocation inside command stream with real buffer address
  */
-int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi)
+static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
+                              int lo, int hi, unsigned size, uint32_t index)
 {
        struct amdgpu_bo_va_mapping *mapping;
        struct amdgpu_ib *ib = &p->ibs[ib_idx];
        struct amdgpu_bo *bo;
        uint64_t addr;
 
+       if (index == 0xffffffff)
+               index = 0;
+
        addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
               ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
+       addr += ((uint64_t)size) * ((uint64_t)index);
 
        mapping = amdgpu_cs_find_mapping(p, addr, &bo);
        if (mapping == NULL) {
-               DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d\n",
+               DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
+                         addr, lo, hi, size, index);
+               return -EINVAL;
+       }
+
+       if ((addr + (uint64_t)size) >
+           ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+               DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
                          addr, lo, hi);
                return -EINVAL;
        }
 
        addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
        addr += amdgpu_bo_gpu_offset(bo);
+       addr -= ((uint64_t)size) * ((uint64_t)index);
 
        ib->ptr[lo] = addr & 0xFFFFFFFF;
        ib->ptr[hi] = addr >> 32;
@@ -493,6 +507,48 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int
        return 0;
 }
 
+/**
+ * amdgpu_vce_validate_handle - validate stream handle
+ *
+ * @p: parser context
+ * @handle: handle to validate
+ * @allocated: allocated a new handle?
+ *
+ * Validates the handle and return the found session index or -EINVAL
+ * we we don't have another free session index.
+ */
+static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
+                                     uint32_t handle, bool *allocated)
+{
+       unsigned i;
+
+       *allocated = false;
+
+       /* validate the handle */
+       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+               if (atomic_read(&p->adev->vce.handles[i]) == handle) {
+                       if (p->adev->vce.filp[i] != p->filp) {
+                               DRM_ERROR("VCE handle collision detected!\n");
+                               return -EINVAL;
+                       }
+                       return i;
+               }
+       }
+
+       /* handle not found try to alloc a new one */
+       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+               if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
+                       p->adev->vce.filp[i] = p->filp;
+                       p->adev->vce.img_size[i] = 0;
+                       *allocated = true;
+                       return i;
+               }
+       }
+
+       DRM_ERROR("No more free VCE handles!\n");
+       return -EINVAL;
+}
+
 /**
  * amdgpu_vce_cs_parse - parse and validate the command stream
  *
@@ -501,10 +557,15 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int
  */
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 {
-       uint32_t handle = 0;
-       bool destroy = false;
-       int i, r, idx = 0;
        struct amdgpu_ib *ib = &p->ibs[ib_idx];
+       unsigned fb_idx = 0, bs_idx = 0;
+       int session_idx = -1;
+       bool destroyed = false;
+       bool created = false;
+       bool allocated = false;
+       uint32_t tmp, handle = 0;
+       uint32_t *size = &tmp;
+       int i, r = 0, idx = 0;
 
        amdgpu_vce_note_usage(p->adev);
 
@@ -514,16 +575,44 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 
                if ((len < 8) || (len & 3)) {
                        DRM_ERROR("invalid VCE command length (%d)!\n", len);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
+               }
+
+               if (destroyed) {
+                       DRM_ERROR("No other command allowed after destroy!\n");
+                       r = -EINVAL;
+                       goto out;
                }
 
                switch (cmd) {
                case 0x00000001: // session
                        handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+                       session_idx = amdgpu_vce_validate_handle(p, handle,
+                                                                &allocated);
+                       if (session_idx < 0)
+                               return session_idx;
+                       size = &p->adev->vce.img_size[session_idx];
                        break;
 
                case 0x00000002: // task info
+                       fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
+                       bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
+                       break;
+
                case 0x01000001: // create
+                       created = true;
+                       if (!allocated) {
+                               DRM_ERROR("Handle already in use!\n");
+                               r = -EINVAL;
+                               goto out;
+                       }
+
+                       *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
+                               amdgpu_get_ib_value(p, ib_idx, idx + 10) *
+                               8 * 3 / 2;
+                       break;
+
                case 0x04000001: // config extension
                case 0x04000002: // pic control
                case 0x04000005: // rate control
@@ -534,60 +623,74 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
                        break;
 
                case 0x03000001: // encode
-                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
+                                               *size, 0);
                        if (r)
-                               return r;
+                               goto out;
 
-                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
+                                               *size / 3, 0);
                        if (r)
-                               return r;
+                               goto out;
                        break;
 
                case 0x02000001: // destroy
-                       destroy = true;
+                       destroyed = true;
                        break;
 
                case 0x05000001: // context buffer
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+                                               *size * 2, 0);
+                       if (r)
+                               goto out;
+                       break;
+
                case 0x05000004: // video bitstream buffer
+                       tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+                                               tmp, bs_idx);
+                       if (r)
+                               goto out;
+                       break;
+
                case 0x05000005: // feedback buffer
-                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2);
+                       r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
+                                               4096, fb_idx);
                        if (r)
-                               return r;
+                               goto out;
                        break;
 
                default:
                        DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
-                       return -EINVAL;
+                       r = -EINVAL;
+                       goto out;
                }
 
-               idx += len / 4;
-       }
-
-       if (destroy) {
-               /* IB contains a destroy msg, free the handle */
-               for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
-                       atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
+               if (session_idx == -1) {
+                       DRM_ERROR("no session command at start of IB\n");
+                       r = -EINVAL;
+                       goto out;
+               }
 
-               return 0;
+               idx += len / 4;
        }
 
-       /* create or encode, validate the handle */
-       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
-               if (atomic_read(&p->adev->vce.handles[i]) == handle)
-                       return 0;
+       if (allocated && !created) {
+               DRM_ERROR("New session without create command!\n");
+               r = -ENOENT;
        }
 
-       /* handle not found try to alloc a new one */
-       for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
-               if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
-                       p->adev->vce.filp[i] = p->filp;
-                       return 0;
-               }
+out:
+       if ((!r && destroyed) || (r && allocated)) {
+               /*
+                * IB contains a destroy msg or we have allocated an
+                * handle and got an error, anyway free the handle
+                */
+               for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+                       atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
        }
 
-       DRM_ERROR("No more free VCE handles!\n");
-
-       return -EINVAL;
+       return r;
 }
 
 /**
index b6a9d0956c6060befa3bd0b8a9b7dc69db5568c2..7ccdb5927da5ce4bcc7f1db1009c5b5297bca29b 100644 (file)
@@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
                               struct amdgpu_fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
-int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
                                    struct amdgpu_semaphore *semaphore,
index 407882b233c7952c99ed3128fbd9c7aa2adcab58..9a4e3b63f1cb4bf7ca9c73e813a0568f320c6574 100644 (file)
@@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
        list_add(&mapping->list, &bo_va->mappings);
        interval_tree_insert(&mapping->it, &vm->va);
+       trace_amdgpu_vm_bo_map(bo_va, mapping);
 
        bo_va->addr = 0;
 
@@ -1058,6 +1059,7 @@ error_free:
        mutex_lock(&vm->mutex);
        list_del(&mapping->list);
        interval_tree_remove(&mapping->it, &vm->va);
+       trace_amdgpu_vm_bo_unmap(bo_va, mapping);
        kfree(mapping);
 
 error_unlock:
@@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
        mutex_lock(&vm->mutex);
        list_del(&mapping->list);
        interval_tree_remove(&mapping->it, &vm->va);
+       trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
        if (bo_va->addr) {
                /* clear the old address */
@@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
        list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) {
                list_del(&mapping->list);
                interval_tree_remove(&mapping->it, &vm->va);
+               trace_amdgpu_vm_bo_unmap(bo_va, mapping);
                if (bo_va->addr)
                        list_add(&mapping->list, &vm->freed);
                else
index 5dab578d6462ab2949e005b9996612e09f5f1fa5..341c566818419317a0c3d16a3d5b738840e30b46 100644 (file)
@@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
                return -EINVAL;
        }
 
-       adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
-       if (adev->ip_block_enabled == NULL)
-               return -ENOMEM;
-
        return 0;
 }
 
index 220865a44814a59a1934b4de16a87d1d797a3541..d19085a9706489a00a0e13306af0d6275587b88c 100644 (file)
 #define VCE_CMD_IB_AUTO                0x00000005
 #define VCE_CMD_SEMAPHORE      0x00000006
 
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+enum {
+       MTYPE_CACHED = 0,
+       MTYPE_NONCACHED = 3
+};
+
 #endif
index e4936a452bc6981e91dfc4bfa1c8dc4202992a13..f75a31df30bdb704f93e5dd465a3a74d93b524d8 100644 (file)
@@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
        pi->mgcg_cgtt_local1 = 0x0;
        pi->clock_slow_down_step = 25000;
        pi->skip_clock_slow_down = 1;
-       pi->enable_nb_ps_policy = 1;
+       pi->enable_nb_ps_policy = 0;
        pi->caps_power_containment = true;
        pi->caps_cac = true;
        pi->didt_enabled = false;
index 782a74107664df05a7d7abff6e503d80389fc275..99e1afc896294c90c13f7e3ed0cc2ceff2aae1d7 100644 (file)
@@ -46,7 +46,7 @@
 
 /* Do not change the following, it is also defined in SMU8.h */
 #define SMU_EnabledFeatureScoreboard_AcpDpmOn          0x00000001
-#define SMU_EnabledFeatureScoreboard_SclkDpmOn         0x00100000
+#define SMU_EnabledFeatureScoreboard_SclkDpmOn         0x00200000
 #define SMU_EnabledFeatureScoreboard_UvdDpmOn          0x00800000
 #define SMU_EnabledFeatureScoreboard_VceDpmOn          0x01000000
 
index 72c27ac915f2a8681f35db4d15e232918405eb95..aaca8d663f2c60e97921e0c06a69a1c7a4549322 100644 (file)
@@ -3379,7 +3379,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
        uint32_t disp_int, mask, int_control, tmp;
        unsigned hpd;
 
-       if (entry->src_data > 6) {
+       if (entry->src_data >= adev->mode_info.num_hpd) {
                DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
                return 0;
        }
index cb7907447b81dd3696312ce65dc58606c52a9ab4..2c188fb9fd22ff1a3528673beb8866639d5ef631 100644 (file)
@@ -2009,6 +2009,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
+/**
+ * gmc_v7_0_init_compute_vmid - gart enable
+ *
+ * @rdev: amdgpu_device pointer
+ *
+ * Initialize compute vmid sh_mem registers
+ *
+ */
+#define DEFAULT_SH_MEM_BASES   (0x6000)
+#define FIRST_COMPUTE_VMID     (8)
+#define LAST_COMPUTE_VMID      (16)
+static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+       int i;
+       uint32_t sh_mem_config;
+       uint32_t sh_mem_bases;
+
+       /*
+        * Configure apertures:
+        * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+        * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+        * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+       */
+       sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+       sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                       SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+       sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
+       mutex_lock(&adev->srbm_mutex);
+       for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+               cik_srbm_select(adev, 0, 0, 0, i);
+               /* CP and shaders */
+               WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+               WREG32(mmSH_MEM_APE1_BASE, 1);
+               WREG32(mmSH_MEM_APE1_LIMIT, 0);
+               WREG32(mmSH_MEM_BASES, sh_mem_bases);
+       }
+       cik_srbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+
 /**
  * gfx_v7_0_gpu_init - setup the 3D engine
  *
@@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
        cik_srbm_select(adev, 0, 0, 0, 0);
        mutex_unlock(&adev->srbm_mutex);
 
+       gmc_v7_0_init_compute_vmid(adev);
+
        WREG32(mmSX_DEBUG_1, 0x20);
 
        WREG32(mmTA_CNTL_AUX, 0x00010000);
index 14242bd33363d3e26b368a1913bb73a2bf931ba9..7b683fb2173c728fff760c926f1204b3897b4eae 100644 (file)
@@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
+/**
+ * gmc_v8_0_init_compute_vmid - gart enable
+ *
+ * @rdev: amdgpu_device pointer
+ *
+ * Initialize compute vmid sh_mem registers
+ *
+ */
+#define DEFAULT_SH_MEM_BASES   (0x6000)
+#define FIRST_COMPUTE_VMID     (8)
+#define LAST_COMPUTE_VMID      (16)
+static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+       int i;
+       uint32_t sh_mem_config;
+       uint32_t sh_mem_bases;
+
+       /*
+        * Configure apertures:
+        * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+        * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+        * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+        */
+       sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+       sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
+                       SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
+                       SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                       SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+                       MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+                       SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+       mutex_lock(&adev->srbm_mutex);
+       for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+               vi_srbm_select(adev, 0, 0, 0, i);
+               /* CP and shaders */
+               WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+               WREG32(mmSH_MEM_APE1_BASE, 1);
+               WREG32(mmSH_MEM_APE1_LIMIT, 0);
+               WREG32(mmSH_MEM_BASES, sh_mem_bases);
+       }
+       vi_srbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+
 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 {
        u32 gb_addr_config;
@@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
        vi_srbm_select(adev, 0, 0, 0, 0);
        mutex_unlock(&adev->srbm_mutex);
 
+       gmc_v8_0_init_compute_vmid(adev);
+
        mutex_lock(&adev->grbm_idx_mutex);
        /*
         * making sure that the following register writes will be broadcasted
@@ -3081,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
                                WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
                                       AMDGPU_DOORBELL_KIQ << 2);
                                WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
-                                      AMDGPU_DOORBELL_MEC_RING7 << 2);
+                                               0x7FFFF << 2);
                        }
                        tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
                        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -3097,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
                WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
                       mqd->cp_hqd_pq_doorbell_control);
 
+               /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+               ring->wptr = 0;
+               mqd->cp_hqd_pq_wptr = ring->wptr;
+               WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+               mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
                /* set the vmid for the queue */
                mqd->cp_hqd_vmid = 0;
                WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
index e3c1fde753638de09e9465464bb47339165aa62e..7bb37b93993fb5312eb2d46189bf09bf789c3989 100644 (file)
@@ -438,6 +438,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
        /* XXX todo */
 }
 
+/**
+ * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VI).
+ */
+static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+       u32 f32_cntl;
+       int i;
+
+       for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
+               f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
+               if (enable)
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                                       AUTO_CTXSW_ENABLE, 1);
+               else
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                                       AUTO_CTXSW_ENABLE, 0);
+               WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
+       }
+}
+
 /**
  * sdma_v3_0_enable - stop the async dma engines
  *
@@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 
        /* unhalt the MEs */
        sdma_v3_0_enable(adev, true);
+       /* enable sdma ring preemption */
+       sdma_v3_0_ctx_switch_enable(adev, true);
 
        /* start the gfx rings and rlc compute queues */
        r = sdma_v3_0_gfx_resume(adev);
@@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       sdma_v3_0_ctx_switch_enable(adev, false);
        sdma_v3_0_enable(adev, false);
 
        return 0;
index 90fc93c2c1d04571e4a694af23e5a7af51267a0a..fa5a4448531dfe9dd307d88b55e051761821a28d 100644 (file)
@@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
                return -EINVAL;
        }
 
-       adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
-       if (adev->ip_block_enabled == NULL)
-               return -ENOMEM;
-
        return 0;
 }
 
index 619dad1b23863716972a23b12226b16af533a068..9daa2883ac186f73c64baa40ba61aadfd0a94c9a 100644 (file)
@@ -516,17 +516,17 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
                struct page *page_table;
 
                if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
-                       continue;
+                       break;
 
                pd = ppgtt->pdp.page_directory[pdpe];
 
                if (WARN_ON(!pd->page_table[pde]))
-                       continue;
+                       break;
 
                pt = pd->page_table[pde];
 
                if (WARN_ON(!pt->page))
-                       continue;
+                       break;
 
                page_table = pt->page;
 
index f5edb3504167ec09b5165d6f54aa760a85f864d1..2030f602cbf8b74366bcb78f9f2ddc4a5f0dd9c6 100644 (file)
@@ -3491,6 +3491,7 @@ enum skl_disp_power_wells {
 #define   BLM_POLARITY_PNV                     (1 << 0) /* pnv only */
 
 #define BLC_HIST_CTL   (dev_priv->info.display_mmio_offset + 0x61260)
+#define  BLM_HISTOGRAM_ENABLE                  (1 << 31)
 
 /* New registers for PCH-split platforms. Safe where new bits show up, the
  * register layout machtes with gen4 BLC_PWM_CTL[12]. */
index dcb1d25d6f051ee88ae84ba7eee7e5fe4f4a2c76..1b61f98103870171e75338595b32e5dd473523e0 100644 (file)
@@ -13303,6 +13303,16 @@ intel_check_primary_plane(struct drm_plane *plane,
                                intel_crtc->atomic.wait_vblank = true;
                }
 
+               /*
+                * FIXME: Actually if we will still have any other plane enabled
+                * on the pipe we could let IPS enabled still, but for
+                * now lets consider that when we make primary invisible
+                * by setting DSPCNTR to 0 on update_primary_plane function
+                * IPS needs to be disable.
+                */
+               if (!state->visible || !fb)
+                       intel_crtc->atomic.disable_ips = true;
+
                intel_crtc->atomic.fb_bits |=
                        INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe);
 
@@ -13400,6 +13410,9 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc)
        if (intel_crtc->atomic.disable_fbc)
                intel_fbc_disable(dev);
 
+       if (intel_crtc->atomic.disable_ips)
+               hsw_disable_ips(intel_crtc);
+
        if (intel_crtc->atomic.pre_disable_primary)
                intel_pre_disable_primary(crtc);
 
index 76afc62373d75bc8c6d9349acd42a38cc71a5a02..6e8faa25379240cab60f57631adf42612f28df33 100644 (file)
@@ -1140,6 +1140,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock)
 static void
 hsw_dp_set_ddi_pll_sel(struct intel_crtc_state *pipe_config, int link_bw)
 {
+       memset(&pipe_config->dpll_hw_state, 0,
+              sizeof(pipe_config->dpll_hw_state));
+
        switch (link_bw) {
        case DP_LINK_BW_1_62:
                pipe_config->ddi_pll_sel = PORT_CLK_SEL_LCPLL_810;
index 2afb31a4627573a3f97d0a4530231f27451e0793..105928382e216239043faf4e651d10520c92b678 100644 (file)
@@ -485,6 +485,7 @@ struct intel_crtc_atomic_commit {
        /* Sleepable operations to perform before commit */
        bool wait_for_flips;
        bool disable_fbc;
+       bool disable_ips;
        bool pre_disable_primary;
        bool update_wm;
        unsigned disabled_planes;
index 7d83527f95f797fd3e020227753923c6b029d41b..55aad2322e10ec8e7ea168aa84e0b7d98dcf9b91 100644 (file)
@@ -907,6 +907,14 @@ static void i9xx_enable_backlight(struct intel_connector *connector)
 
        /* XXX: combine this into above write? */
        intel_panel_actually_set_backlight(connector, panel->backlight.level);
+
+       /*
+        * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is
+        * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2
+        * that has backlight.
+        */
+       if (IS_GEN2(dev))
+               I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE);
 }
 
 static void i965_enable_backlight(struct intel_connector *connector)
index b0688b0c8908f5ba0704657f6aa39d1b47508a54..4ecf5caa8c6d9745f9421710179aa2f81ef3587f 100644 (file)
@@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev,
        WDOORBELL32(ring->doorbell_index, ring->wptr);
 }
 
+static void cik_compute_stop(struct radeon_device *rdev,
+                            struct radeon_ring *ring)
+{
+       u32 j, tmp;
+
+       cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
+       /* Disable wptr polling. */
+       tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
+       tmp &= ~WPTR_POLL_EN;
+       WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
+       /* Disable HQD. */
+       if (RREG32(CP_HQD_ACTIVE) & 1) {
+               WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
+               for (j = 0; j < rdev->usec_timeout; j++) {
+                       if (!(RREG32(CP_HQD_ACTIVE) & 1))
+                               break;
+                       udelay(1);
+               }
+               WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
+               WREG32(CP_HQD_PQ_RPTR, 0);
+               WREG32(CP_HQD_PQ_WPTR, 0);
+       }
+       cik_srbm_select(rdev, 0, 0, 0, 0);
+}
+
 /**
  * cik_cp_compute_enable - enable/disable the compute CP MEs
  *
@@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
        if (enable)
                WREG32(CP_MEC_CNTL, 0);
        else {
+               /*
+                * To make hibernation reliable we need to clear compute ring
+                * configuration before halting the compute ring.
+                */
+               mutex_lock(&rdev->srbm_mutex);
+               cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
+               cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+               mutex_unlock(&rdev->srbm_mutex);
+
                WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
                rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
                rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
index f86eb54e7763d65341006c4ff25f4e8f8076760c..d16f2eebd95e6b2df5412d072023a89d43d32ae2 100644 (file)
@@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev)
        }
        rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
        rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+
+       /* FIXME use something else than big hammer but after few days can not
+        * seem to find good combination so reset SDMA blocks as it seems we
+        * do not shut them down properly. This fix hibernation and does not
+        * affect suspend to ram.
+        */
+       WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
+       (void)RREG32(SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(SRBM_SOFT_RESET, 0);
+       (void)RREG32(SRBM_SOFT_RESET);
 }
 
 /**
index c89215275053d3168e6deba468b32fc05ba89bf6..fa719c53449bcd90e009e1b59d1b3e1ed5bff6f3 100644 (file)
@@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector,
        dig = radeon_encoder->enc_priv;
 
        if (status == connector_status_connected) {
-               struct radeon_connector *radeon_connector;
-               int sink_type;
-
                if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) {
                        radeon_encoder->audio = NULL;
                        return;
                }
 
-               radeon_connector = to_radeon_connector(connector);
-               sink_type = radeon_dp_getsinktype(radeon_connector);
+               if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+                       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 
-               if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-                       sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
-                       radeon_encoder->audio = rdev->audio.dp_funcs;
-               else
+                       if (radeon_dp_getsinktype(radeon_connector) ==
+                           CONNECTOR_OBJECT_ID_DISPLAYPORT)
+                               radeon_encoder->audio = rdev->audio.dp_funcs;
+                       else
+                               radeon_encoder->audio = rdev->audio.hdmi_funcs;
+               } else {
                        radeon_encoder->audio = rdev->audio.hdmi_funcs;
+               }
 
                dig->afmt->pin = radeon_audio_get_pin(connector->encoder);
                radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
index aeb676708e60cfb1871326bfc5a689631bb98741..634793ea841889847ac090c32470548a1cae418d 100644 (file)
@@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper,
        }
 
        info->par = rfbdev;
-       info->skip_vt_switch = true;
 
        ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
        if (ret) {
index edafd3c2b17028a73ff5128568c73adfaff0f85b..06ac59fe332ab089d21b279aba092f615710c7ca 100644 (file)
@@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
                return 0;
 
        if (gtt && gtt->userptr) {
-               ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
+               ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
                if (!ttm->sg)
                        return -ENOMEM;
 
index 3662157c2b1582b54291b607be1667579eef368b..ec10533a49b87a905aa82eda96e48bddae32d87f 100644 (file)
@@ -1129,12 +1129,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
                interval_tree_remove(&bo_va->it, &vm->va);
 
        spin_lock(&vm->status_lock);
-       if (list_empty(&bo_va->vm_status)) {
+       list_del(&bo_va->vm_status);
+       if (bo_va->it.start || bo_va->it.last) {
                bo_va->bo = radeon_bo_ref(bo_va->bo);
                list_add(&bo_va->vm_status, &vm->freed);
        } else {
                radeon_fence_unref(&bo_va->last_pt_update);
-               list_del(&bo_va->vm_status);
                kfree(bo_va);
        }
        spin_unlock(&vm->status_lock);
index 3962176ee71325ca4434fa34643f117c58b2d217..01b558fe369539f447d36493f3bcd7e1bc4f3ded 100644 (file)
@@ -21,6 +21,7 @@
 #include <drm/drm_fb_helper.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
+#include <linux/module.h>
 #include <linux/of_graph.h>
 #include <linux/component.h>
 
index 4557f335a8a56f243ad4aa326a1826572116d3a5..dc65161d7cad20acb2f079bdb6b229cf779b3613 100644 (file)
@@ -19,6 +19,7 @@
 #include <drm/drm_plane_helper.h>
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/of.h>
index 9d056417d88c5ee9d70d018c7341f10d7e92fb96..f9aaf37262be4cb120201313c30b167896f66aaf 100644 (file)
@@ -24,6 +24,7 @@
 #define __LINUX_HSI_OMAP_SSI_H__
 
 #include <linux/device.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/hsi/hsi.h>
 #include <linux/gpio.h>
index 2a808822af2163d89d83fb02ce1e67eb5f1474d7..37c16afe007a0524eaacb5edcae9399bebfae897 100644 (file)
@@ -777,7 +777,7 @@ static int __init i8k_init_hwmon(void)
        if (err >= 0)
                i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
-       i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell-smm",
+       i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell_smm",
                                                          NULL, i8k_groups);
        if (IS_ERR(i8k_hwmon_dev)) {
                err = PTR_ERR(i8k_hwmon_dev);
index d219c06a857bb5795a176bb88d9e186fd6a4b286..972444a14cca5feb333c5244fd438fbb435f1e7e 100644 (file)
 /* output format */
 #define MCP3021_SAR_SHIFT      2
 #define MCP3021_SAR_MASK       0x3ff
-
 #define MCP3021_OUTPUT_RES     10      /* 10-bit resolution */
-#define MCP3021_OUTPUT_SCALE   4
 
 #define MCP3221_SAR_SHIFT      0
 #define MCP3221_SAR_MASK       0xfff
 #define MCP3221_OUTPUT_RES     12      /* 12-bit resolution */
-#define MCP3221_OUTPUT_SCALE   1
 
 enum chips {
        mcp3021,
@@ -54,7 +51,6 @@ struct mcp3021_data {
        u16 sar_shift;
        u16 sar_mask;
        u8 output_res;
-       u8 output_scale;
 };
 
 static int mcp3021_read16(struct i2c_client *client)
@@ -84,13 +80,7 @@ static int mcp3021_read16(struct i2c_client *client)
 
 static inline u16 volts_from_reg(struct mcp3021_data *data, u16 val)
 {
-       if (val == 0)
-               return 0;
-
-       val = val * data->output_scale - data->output_scale / 2;
-
-       return val * DIV_ROUND_CLOSEST(data->vdd,
-                       (1 << data->output_res) * data->output_scale);
+       return DIV_ROUND_CLOSEST(data->vdd * val, 1 << data->output_res);
 }
 
 static ssize_t show_in_input(struct device *dev, struct device_attribute *attr,
@@ -132,14 +122,12 @@ static int mcp3021_probe(struct i2c_client *client,
                data->sar_shift = MCP3021_SAR_SHIFT;
                data->sar_mask = MCP3021_SAR_MASK;
                data->output_res = MCP3021_OUTPUT_RES;
-               data->output_scale = MCP3021_OUTPUT_SCALE;
                break;
 
        case mcp3221:
                data->sar_shift = MCP3221_SAR_SHIFT;
                data->sar_mask = MCP3221_SAR_MASK;
                data->output_res = MCP3221_OUTPUT_RES;
-               data->output_scale = MCP3221_OUTPUT_SCALE;
                break;
        }
 
index 55765790907b3768eb1c4b23e2e3bd77d4eaf294..28fcb2e246d55a7acc52703e434b98de3e22c45b 100644 (file)
@@ -547,7 +547,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj,
        if (index >= 9 && index < 18 &&
            (reg & 0x0c) != 0x04 && (reg & 0x0c) != 0x08)       /* RD2 */
                return 0;
-       if (index >= 18 && index < 27 && (reg & 0x30) != 0x10)  /* RD3 */
+       if (index >= 18 && index < 27 && (reg & 0x30) != 0x20)  /* RD3 */
                return 0;
        if (index >= 27 && index < 35)                          /* local */
                return attr->mode;
index f3755e0aa935c96a3aa0b0d3e4122b82e8cfe241..f80acb36ff075ca6a25b398861d97993d32fd57b 100644 (file)
@@ -195,4 +195,4 @@ static int __init ipc_init(void)
 {
        return amba_driver_register(&pl320_driver);
 }
-module_init(ipc_init);
+subsys_initcall(ipc_init);
index 9df2b6801f767c9c0da6904b689299c93d031417..b2b411da297b06e73441f8dd51c8bae0b004bcc0 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/platform_data/hsmmc-omap.h>
 
 /* OMAP HSMMC Host Controller Registers */
@@ -218,7 +219,6 @@ struct omap_hsmmc_host {
        unsigned int            flags;
 #define AUTO_CMD23             (1 << 0)        /* Auto CMD23 support */
 #define HSMMC_SDIO_IRQ_ENABLED (1 << 1)        /* SDIO irq enabled */
-#define HSMMC_WAKE_IRQ_ENABLED (1 << 2)
        struct omap_hsmmc_next  next_data;
        struct  omap_hsmmc_platform_data        *pdata;
 
@@ -1117,22 +1117,6 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
-{
-       struct omap_hsmmc_host *host = dev_id;
-
-       /* cirq is level triggered, disable to avoid infinite loop */
-       spin_lock(&host->irq_lock);
-       if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
-               disable_irq_nosync(host->wake_irq);
-               host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
-       }
-       spin_unlock(&host->irq_lock);
-       pm_request_resume(host->dev); /* no use counter */
-
-       return IRQ_HANDLED;
-}
-
 static void set_sd_bus_power(struct omap_hsmmc_host *host)
 {
        unsigned long i;
@@ -1665,7 +1649,6 @@ static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 
 static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
 {
-       struct mmc_host *mmc = host->mmc;
        int ret;
 
        /*
@@ -1677,11 +1660,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
        if (!host->dev->of_node || !host->wake_irq)
                return -ENODEV;
 
-       /* Prevent auto-enabling of IRQ */
-       irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
-       ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
-                              IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-                              mmc_hostname(mmc), host);
+       ret = dev_pm_set_dedicated_wake_irq(host->dev, host->wake_irq);
        if (ret) {
                dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
                goto err;
@@ -1718,7 +1697,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
        return 0;
 
 err_free_irq:
-       devm_free_irq(host->dev, host->wake_irq, host);
+       dev_pm_clear_wake_irq(host->dev);
 err:
        dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
        host->wake_irq = 0;
@@ -2007,6 +1986,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
                omap_hsmmc_ops.multi_io_quirk = omap_hsmmc_multi_io_quirk;
        }
 
+       device_init_wakeup(&pdev->dev, true);
        pm_runtime_enable(host->dev);
        pm_runtime_get_sync(host->dev);
        pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY);
@@ -2147,6 +2127,7 @@ err_slot_name:
        if (host->use_reg)
                omap_hsmmc_reg_put(host);
 err_irq:
+       device_init_wakeup(&pdev->dev, false);
        if (host->tx_chan)
                dma_release_channel(host->tx_chan);
        if (host->rx_chan)
@@ -2178,6 +2159,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 
        pm_runtime_put_sync(host->dev);
        pm_runtime_disable(host->dev);
+       device_init_wakeup(&pdev->dev, false);
        if (host->dbclk)
                clk_disable_unprepare(host->dbclk);
 
@@ -2204,11 +2186,6 @@ static int omap_hsmmc_suspend(struct device *dev)
                                OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
        }
 
-       /* do not wake up due to sdio irq */
-       if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
-           !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
-               disable_irq(host->wake_irq);
-
        if (host->dbclk)
                clk_disable_unprepare(host->dbclk);
 
@@ -2233,11 +2210,6 @@ static int omap_hsmmc_resume(struct device *dev)
                omap_hsmmc_conf_bus_power(host);
 
        omap_hsmmc_protect_card(host);
-
-       if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
-           !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
-               enable_irq(host->wake_irq);
-
        pm_runtime_mark_last_busy(host->dev);
        pm_runtime_put_autosuspend(host->dev);
        return 0;
@@ -2277,10 +2249,6 @@ static int omap_hsmmc_runtime_suspend(struct device *dev)
                }
 
                pinctrl_pm_select_idle_state(dev);
-
-               WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
-               enable_irq(host->wake_irq);
-               host->flags |= HSMMC_WAKE_IRQ_ENABLED;
        } else {
                pinctrl_pm_select_idle_state(dev);
        }
@@ -2302,11 +2270,6 @@ static int omap_hsmmc_runtime_resume(struct device *dev)
        spin_lock_irqsave(&host->irq_lock, flags);
        if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
            (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
-               /* sdio irq flag can't change while in runtime suspend */
-               if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
-                       disable_irq_nosync(host->wake_irq);
-                       host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
-               }
 
                pinctrl_pm_select_default_state(host->dev);
 
index 4c04360f378bb94037b77c1283bc47bd245b7b7c..b2a189507fc35edfdc3057385df5c750d2ff2e8f 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/resource.h>
index 8c43589c3edba9248c13760c8e92025b4a5cf599..1f52462f4cdd4b7e1431723ca37b32c267da68a1 100644 (file)
@@ -220,20 +220,10 @@ free_resources:
        return ret;
 }
 
-static int goldfish_pdev_bus_remove(struct platform_device *pdev)
-{
-       iounmap(pdev_bus_base);
-       free_irq(pdev_bus_irq, pdev);
-       release_mem_region(pdev_bus_addr, pdev_bus_len);
-       return 0;
-}
-
 static struct platform_driver goldfish_pdev_bus_driver = {
        .probe = goldfish_pdev_bus_probe,
-       .remove = goldfish_pdev_bus_remove,
        .driver = {
                .name = "goldfish_pdev_bus"
        }
 };
-
-module_platform_driver(goldfish_pdev_bus_driver);
+builtin_platform_driver(goldfish_pdev_bus_driver);
index d3c7d245ae63d93c5ec36617e69331b25f9b570c..7d0d269a0837c0b84e9f72eeabbab354159a3b8e 100644 (file)
@@ -88,4 +88,4 @@ static struct platform_driver syscon_reboot_driver = {
                .of_match_table = syscon_reboot_of_match,
        },
 };
-module_platform_driver(syscon_reboot_driver);
+builtin_platform_driver(syscon_reboot_driver);
index 6af41abccacb473921dbeba078147d067ea8e0cd..c07ee13bd47047e1e930a16e1d76229ae0b7b027 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
index b562af816c0a5cd1fa0fb525b6d99e12e18fb034..b04b05a0904eec086c49250c3dc4e27cb84a3927 100644 (file)
@@ -260,7 +260,7 @@ static int __init qcom_cpuidle_init(struct device_node *cpu_node, int cpu)
                /* We have atleast one power down mode */
                cpumask_clear(&mask);
                cpumask_set_cpu(cpu, &mask);
-               qcom_scm_set_warm_boot_addr(cpu_resume, &mask);
+               qcom_scm_set_warm_boot_addr(cpu_resume_arm, &mask);
        }
 
        per_cpu(qcom_idle_ops, cpu) = fns;
index cc119d15dd1616feeef9821ecf98c94fc19ecfec..75d0457a77b72ade791df16e6193662028e35847 100644 (file)
@@ -1021,7 +1021,7 @@ static struct platform_driver tegra_pmc_driver = {
        },
        .probe = tegra_pmc_probe,
 };
-module_platform_driver(tegra_pmc_driver);
+builtin_platform_driver(tegra_pmc_driver);
 
 /*
  * Early initialization to allow access to registers in the very early boot
index 1a07bf540fecc384ef44112e0798c1afd798e9dd..e642c4540dda123a39b9ff42437f34f64f3bad06 100644 (file)
@@ -142,4 +142,4 @@ static struct platform_driver realview_soc_driver = {
                .of_match_table = realview_soc_of_match,
        },
 };
-module_platform_driver(realview_soc_driver);
+builtin_platform_driver(realview_soc_driver);
index 3774600741d844f7d21ab98d3186d4fde25c0db3..9325262289f9e191a1c3ecb5d8024df56b7e0d21 100644 (file)
@@ -640,25 +640,7 @@ err_destroy_ports:
        put_tty_driver(channel_driver);
        return ret;
 }
-
-static void dashtty_exit(void)
-{
-       int nport;
-       struct dashtty_port *dport;
-
-       del_timer_sync(&put_timer);
-       kthread_stop(dashtty_thread);
-       del_timer_sync(&poll_timer);
-       tty_unregister_driver(channel_driver);
-       for (nport = 0; nport < NUM_TTY_CHANNELS; nport++) {
-               dport = &dashtty_ports[nport];
-               tty_port_destroy(&dport->port);
-       }
-       put_tty_driver(channel_driver);
-}
-
-module_init(dashtty_init);
-module_exit(dashtty_exit);
+device_initcall(dashtty_init);
 
 #ifdef CONFIG_DA_CONSOLE
 
index 978204333c94b364b843c723c45dff353a5dbdfb..d75a66c7275098184b53344d0729c9dd2ac50a85 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/console.h>
 #include <linux/pm_qos.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/dma-mapping.h>
 
 #include "8250.h"
@@ -552,17 +553,6 @@ static void omap8250_uart_qos_work(struct work_struct *work)
        pm_qos_update_request(&priv->pm_qos_request, priv->latency);
 }
 
-static irqreturn_t omap_wake_irq(int irq, void *dev_id)
-{
-       struct uart_port *port = dev_id;
-       int ret;
-
-       ret = port->handle_irq(port);
-       if (ret)
-               return IRQ_HANDLED;
-       return IRQ_NONE;
-}
-
 #ifdef CONFIG_SERIAL_8250_DMA
 static int omap_8250_dma_handle_irq(struct uart_port *port);
 #endif
@@ -596,11 +586,9 @@ static int omap_8250_startup(struct uart_port *port)
        int ret;
 
        if (priv->wakeirq) {
-               ret = request_irq(priv->wakeirq, omap_wake_irq,
-                                 port->irqflags, "uart wakeup irq", port);
+               ret = dev_pm_set_dedicated_wake_irq(port->dev, priv->wakeirq);
                if (ret)
                        return ret;
-               disable_irq(priv->wakeirq);
        }
 
        pm_runtime_get_sync(port->dev);
@@ -649,8 +637,7 @@ static int omap_8250_startup(struct uart_port *port)
 err:
        pm_runtime_mark_last_busy(port->dev);
        pm_runtime_put_autosuspend(port->dev);
-       if (priv->wakeirq)
-               free_irq(priv->wakeirq, port);
+       dev_pm_clear_wake_irq(port->dev);
        return ret;
 }
 
@@ -682,10 +669,8 @@ static void omap_8250_shutdown(struct uart_port *port)
 
        pm_runtime_mark_last_busy(port->dev);
        pm_runtime_put_autosuspend(port->dev);
-
        free_irq(port->irq, port);
-       if (priv->wakeirq)
-               free_irq(priv->wakeirq, port);
+       dev_pm_clear_wake_irq(port->dev);
 }
 
 static void omap_8250_throttle(struct uart_port *port)
@@ -1226,31 +1211,6 @@ static int omap8250_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-
-static inline void omap8250_enable_wakeirq(struct omap8250_priv *priv,
-                                          bool enable)
-{
-       if (!priv->wakeirq)
-               return;
-
-       if (enable)
-               enable_irq(priv->wakeirq);
-       else
-               disable_irq_nosync(priv->wakeirq);
-}
-
-static void omap8250_enable_wakeup(struct omap8250_priv *priv,
-                                  bool enable)
-{
-       if (enable == priv->wakeups_enabled)
-               return;
-
-       omap8250_enable_wakeirq(priv, enable);
-       priv->wakeups_enabled = enable;
-}
-#endif
-
 #ifdef CONFIG_PM_SLEEP
 static int omap8250_prepare(struct device *dev)
 {
@@ -1277,11 +1237,6 @@ static int omap8250_suspend(struct device *dev)
 
        serial8250_suspend_port(priv->line);
        flush_work(&priv->qos_work);
-
-       if (device_may_wakeup(dev))
-               omap8250_enable_wakeup(priv, true);
-       else
-               omap8250_enable_wakeup(priv, false);
        return 0;
 }
 
@@ -1289,9 +1244,6 @@ static int omap8250_resume(struct device *dev)
 {
        struct omap8250_priv *priv = dev_get_drvdata(dev);
 
-       if (device_may_wakeup(dev))
-               omap8250_enable_wakeup(priv, false);
-
        serial8250_resume_port(priv->line);
        return 0;
 }
@@ -1333,7 +1285,6 @@ static int omap8250_runtime_suspend(struct device *dev)
                        return -EBUSY;
        }
 
-       omap8250_enable_wakeup(priv, true);
        if (up->dma)
                omap_8250_rx_dma(up, UART_IIR_RX_TIMEOUT);
 
@@ -1354,7 +1305,6 @@ static int omap8250_runtime_resume(struct device *dev)
                return 0;
 
        up = serial8250_get_port(priv->line);
-       omap8250_enable_wakeup(priv, false);
        loss_cntx = omap8250_lost_context(up);
 
        if (loss_cntx)
index 7f49172ccd8673b316b3b82ab21282885ef983eb..7a2172b5e93cd296674c9b78a5fd31594db0f492 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/serial_core.h>
 #include <linux/irq.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/gpio.h>
@@ -160,7 +161,6 @@ struct uart_omap_port {
        unsigned long           port_activity;
        int                     context_loss_cnt;
        u32                     errata;
-       u8                      wakeups_enabled;
        u32                     features;
 
        int                     rts_gpio;
@@ -209,28 +209,11 @@ static int serial_omap_get_context_loss_count(struct uart_omap_port *up)
        return pdata->get_context_loss_count(up->dev);
 }
 
-static inline void serial_omap_enable_wakeirq(struct uart_omap_port *up,
-                                      bool enable)
-{
-       if (!up->wakeirq)
-               return;
-
-       if (enable)
-               enable_irq(up->wakeirq);
-       else
-               disable_irq_nosync(up->wakeirq);
-}
-
+/* REVISIT: Remove this when omap3 boots in device tree only mode */
 static void serial_omap_enable_wakeup(struct uart_omap_port *up, bool enable)
 {
        struct omap_uart_port_info *pdata = dev_get_platdata(up->dev);
 
-       if (enable == up->wakeups_enabled)
-               return;
-
-       serial_omap_enable_wakeirq(up, enable);
-       up->wakeups_enabled = enable;
-
        if (!pdata || !pdata->enable_wakeup)
                return;
 
@@ -750,13 +733,11 @@ static int serial_omap_startup(struct uart_port *port)
 
        /* Optional wake-up IRQ */
        if (up->wakeirq) {
-               retval = request_irq(up->wakeirq, serial_omap_irq,
-                                    up->port.irqflags, up->name, up);
+               retval = dev_pm_set_dedicated_wake_irq(up->dev, up->wakeirq);
                if (retval) {
                        free_irq(up->port.irq, up);
                        return retval;
                }
-               disable_irq(up->wakeirq);
        }
 
        dev_dbg(up->port.dev, "serial_omap_startup+%d\n", up->port.line);
@@ -845,8 +826,7 @@ static void serial_omap_shutdown(struct uart_port *port)
        pm_runtime_mark_last_busy(up->dev);
        pm_runtime_put_autosuspend(up->dev);
        free_irq(up->port.irq, up);
-       if (up->wakeirq)
-               free_irq(up->wakeirq, up);
+       dev_pm_clear_wake_irq(up->dev);
 }
 
 static void serial_omap_uart_qos_work(struct work_struct *work)
@@ -1139,13 +1119,6 @@ serial_omap_pm(struct uart_port *port, unsigned int state,
        serial_out(up, UART_EFR, efr);
        serial_out(up, UART_LCR, 0);
 
-       if (!device_may_wakeup(up->dev)) {
-               if (!state)
-                       pm_runtime_forbid(up->dev);
-               else
-                       pm_runtime_allow(up->dev);
-       }
-
        pm_runtime_mark_last_busy(up->dev);
        pm_runtime_put_autosuspend(up->dev);
 }
index 612b093831d52be7040bcefeb279eaf9f4dd9700..9200a8668b498ff3dc76136760496524a0b3cad4 100644 (file)
@@ -1225,6 +1225,15 @@ static int dss_add_child_component(struct device *dev, void *data)
 {
        struct component_match **match = data;
 
+       /*
+        * HACK
+        * We don't have a working driver for rfbi, so skip it here always.
+        * Otherwise dss will never get probed successfully, as it will wait
+        * for rfbi to get probed.
+        */
+       if (strstr(dev_name(dev), "rfbi"))
+               return 0;
+
        component_match_add(dev->parent, match, dss_component_compare, dev);
 
        return 0;
index 64fa248343f65461db232ee4ae0939beff0fc05c..8f84646f10e9560ade100c1dafa180768f1d76de 100644 (file)
@@ -187,10 +187,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
                val_size2 = posix_acl_xattr_size(default_acl->a_count);
 
        err = -ENOMEM;
-       tmp_buf = kmalloc(max(val_size1, val_size2), GFP_NOFS);
+       tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
        if (!tmp_buf)
                goto out_err;
-       pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_NOFS);
+       pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL);
        if (!pagelist)
                goto out_err;
        ceph_pagelist_init(pagelist);
index e162bcd105ee2d98c2dcd12e80c75ee729a7a951..890c50971a690472f6dc795b00fd0bcfbcdf1a39 100644 (file)
@@ -87,17 +87,21 @@ static int ceph_set_page_dirty(struct page *page)
        inode = mapping->host;
        ci = ceph_inode(inode);
 
-       /*
-        * Note that we're grabbing a snapc ref here without holding
-        * any locks!
-        */
-       snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
-
        /* dirty the head */
        spin_lock(&ci->i_ceph_lock);
-       if (ci->i_head_snapc == NULL)
-               ci->i_head_snapc = ceph_get_snap_context(snapc);
-       ++ci->i_wrbuffer_ref_head;
+       BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference
+       if (__ceph_have_pending_cap_snap(ci)) {
+               struct ceph_cap_snap *capsnap =
+                               list_last_entry(&ci->i_cap_snaps,
+                                               struct ceph_cap_snap,
+                                               ci_item);
+               snapc = ceph_get_snap_context(capsnap->context);
+               capsnap->dirty_pages++;
+       } else {
+               BUG_ON(!ci->i_head_snapc);
+               snapc = ceph_get_snap_context(ci->i_head_snapc);
+               ++ci->i_wrbuffer_ref_head;
+       }
        if (ci->i_wrbuffer_ref == 0)
                ihold(inode);
        ++ci->i_wrbuffer_ref;
@@ -346,7 +350,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 
        /* build page vector */
        nr_pages = calc_pages_for(0, len);
-       pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
+       pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL);
        ret = -ENOMEM;
        if (!pages)
                goto out;
@@ -358,7 +362,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
                dout("start_read %p adding %p idx %lu\n", inode, page,
                     page->index);
                if (add_to_page_cache_lru(page, &inode->i_data, page->index,
-                                         GFP_NOFS)) {
+                                         GFP_KERNEL)) {
                        ceph_fscache_uncache_page(inode, page);
                        page_cache_release(page);
                        dout("start_read %p add_to_page_cache failed %p\n",
@@ -436,7 +440,7 @@ out:
  * only snap context we are allowed to write back.
  */
 static struct ceph_snap_context *get_oldest_context(struct inode *inode,
-                                                   u64 *snap_size)
+                                                   loff_t *snap_size)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_snap_context *snapc = NULL;
@@ -476,8 +480,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        struct ceph_osd_client *osdc;
        struct ceph_snap_context *snapc, *oldest;
        loff_t page_off = page_offset(page);
+       loff_t snap_size = -1;
        long writeback_stat;
-       u64 truncate_size, snap_size = 0;
+       u64 truncate_size;
        u32 truncate_seq;
        int err = 0, len = PAGE_CACHE_SIZE;
 
@@ -512,7 +517,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        spin_lock(&ci->i_ceph_lock);
        truncate_seq = ci->i_truncate_seq;
        truncate_size = ci->i_truncate_size;
-       if (!snap_size)
+       if (snap_size == -1)
                snap_size = i_size_read(inode);
        spin_unlock(&ci->i_ceph_lock);
 
@@ -695,7 +700,8 @@ static int ceph_writepages_start(struct address_space *mapping,
        unsigned wsize = 1 << inode->i_blkbits;
        struct ceph_osd_request *req = NULL;
        int do_sync = 0;
-       u64 truncate_size, snap_size;
+       loff_t snap_size, i_size;
+       u64 truncate_size;
        u32 truncate_seq;
 
        /*
@@ -741,7 +747,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 retry:
        /* find oldest snap context with dirty data */
        ceph_put_snap_context(snapc);
-       snap_size = 0;
+       snap_size = -1;
        snapc = get_oldest_context(inode, &snap_size);
        if (!snapc) {
                /* hmm, why does writepages get called when there
@@ -749,16 +755,13 @@ retry:
                dout(" no snap context with dirty data?\n");
                goto out;
        }
-       if (snap_size == 0)
-               snap_size = i_size_read(inode);
        dout(" oldest snapc is %p seq %lld (%d snaps)\n",
             snapc, snapc->seq, snapc->num_snaps);
 
        spin_lock(&ci->i_ceph_lock);
        truncate_seq = ci->i_truncate_seq;
        truncate_size = ci->i_truncate_size;
-       if (!snap_size)
-               snap_size = i_size_read(inode);
+       i_size = i_size_read(inode);
        spin_unlock(&ci->i_ceph_lock);
 
        if (last_snapc && snapc != last_snapc) {
@@ -828,8 +831,10 @@ get_more_pages:
                                dout("waiting on writeback %p\n", page);
                                wait_on_page_writeback(page);
                        }
-                       if (page_offset(page) >= snap_size) {
-                               dout("%p page eof %llu\n", page, snap_size);
+                       if (page_offset(page) >=
+                           (snap_size == -1 ? i_size : snap_size)) {
+                               dout("%p page eof %llu\n", page,
+                                    (snap_size == -1 ? i_size : snap_size));
                                done = 1;
                                unlock_page(page);
                                break;
@@ -884,7 +889,8 @@ get_more_pages:
                                }
 
                                if (do_sync)
-                                       osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+                                       osd_req_op_init(req, 1,
+                                                       CEPH_OSD_OP_STARTSYNC, 0);
 
                                req->r_callback = writepages_finish;
                                req->r_inode = inode;
@@ -944,10 +950,18 @@ get_more_pages:
                }
 
                /* Format the osd request message and submit the write */
-
                offset = page_offset(pages[0]);
-               len = min(snap_size - offset,
-                         (u64)locked_pages << PAGE_CACHE_SHIFT);
+               len = (u64)locked_pages << PAGE_CACHE_SHIFT;
+               if (snap_size == -1) {
+                       len = min(len, (u64)i_size_read(inode) - offset);
+                        /* writepages_finish() clears writeback pages
+                         * according to the data length, so make sure
+                         * data length covers all locked pages */
+                       len = max(len, 1 +
+                               ((u64)(locked_pages - 1) << PAGE_CACHE_SHIFT));
+               } else {
+                       len = min(len, snap_size - offset);
+               }
                dout("writepages got %d pages at %llu~%llu\n",
                     locked_pages, offset, len);
 
@@ -1032,7 +1046,6 @@ static int ceph_update_writeable_page(struct file *file,
 {
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        loff_t page_off = pos & PAGE_CACHE_MASK;
        int pos_in_page = pos & ~PAGE_CACHE_MASK;
        int end_in_page = pos_in_page + len;
@@ -1044,10 +1057,6 @@ retry_locked:
        /* writepages currently holds page lock, but if we change that later, */
        wait_on_page_writeback(page);
 
-       /* check snap context */
-       BUG_ON(!ci->i_snap_realm);
-       down_read(&mdsc->snap_rwsem);
-       BUG_ON(!ci->i_snap_realm->cached_context);
        snapc = page_snap_context(page);
        if (snapc && snapc != ci->i_head_snapc) {
                /*
@@ -1055,7 +1064,6 @@ retry_locked:
                 * context!  is it writeable now?
                 */
                oldest = get_oldest_context(inode, NULL);
-               up_read(&mdsc->snap_rwsem);
 
                if (snapc->seq > oldest->seq) {
                        ceph_put_snap_context(oldest);
@@ -1112,7 +1120,6 @@ retry_locked:
        }
 
        /* we need to read it. */
-       up_read(&mdsc->snap_rwsem);
        r = readpage_nounlock(file, page);
        if (r < 0)
                goto fail_nosnap;
@@ -1157,16 +1164,13 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 
 /*
  * we don't do anything in here that simple_write_end doesn't do
- * except adjust dirty page accounting and drop read lock on
- * mdsc->snap_rwsem.
+ * except adjust dirty page accounting
  */
 static int ceph_write_end(struct file *file, struct address_space *mapping,
                          loff_t pos, unsigned len, unsigned copied,
                          struct page *page, void *fsdata)
 {
        struct inode *inode = file_inode(file);
-       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_mds_client *mdsc = fsc->mdsc;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
        int check_cap = 0;
 
@@ -1188,7 +1192,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
        set_page_dirty(page);
 
        unlock_page(page);
-       up_read(&mdsc->snap_rwsem);
        page_cache_release(page);
 
        if (check_cap)
@@ -1314,13 +1317,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct inode *inode = file_inode(vma->vm_file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi = vma->vm_file->private_data;
-       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+       struct ceph_cap_flush *prealloc_cf;
        struct page *page = vmf->page;
        loff_t off = page_offset(page);
        loff_t size = i_size_read(inode);
        size_t len;
        int want, got, ret;
 
+       prealloc_cf = ceph_alloc_cap_flush();
+       if (!prealloc_cf)
+               return VM_FAULT_SIGBUS;
+
        if (ci->i_inline_version != CEPH_INLINE_NONE) {
                struct page *locked_page = NULL;
                if (off == 0) {
@@ -1330,8 +1337,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                ret = ceph_uninline_data(vma->vm_file, locked_page);
                if (locked_page)
                        unlock_page(locked_page);
-               if (ret < 0)
-                       return VM_FAULT_SIGBUS;
+               if (ret < 0) {
+                       ret = VM_FAULT_SIGBUS;
+                       goto out_free;
+               }
        }
 
        if (off + PAGE_CACHE_SIZE <= size)
@@ -1353,7 +1362,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                        break;
                if (ret != -ERESTARTSYS) {
                        WARN_ON(1);
-                       return VM_FAULT_SIGBUS;
+                       ret = VM_FAULT_SIGBUS;
+                       goto out_free;
                }
        }
        dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
@@ -1373,7 +1383,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        if (ret == 0) {
                /* success.  we'll keep the page locked. */
                set_page_dirty(page);
-               up_read(&mdsc->snap_rwsem);
                ret = VM_FAULT_LOCKED;
        } else {
                if (ret == -ENOMEM)
@@ -1389,7 +1398,8 @@ out:
                int dirty;
                spin_lock(&ci->i_ceph_lock);
                ci->i_inline_version = CEPH_INLINE_NONE;
-               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+                                              &prealloc_cf);
                spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
@@ -1398,6 +1408,8 @@ out:
        dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
             inode, off, len, ceph_cap_string(got), ret);
        ceph_put_cap_refs(ci, got);
+out_free:
+       ceph_free_cap_flush(prealloc_cf);
 
        return ret;
 }
@@ -1509,8 +1521,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
                                    ceph_vino(inode), 0, &len, 0, 1,
                                    CEPH_OSD_OP_CREATE,
                                    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
-                                   ci->i_snap_realm->cached_context,
-                                   0, 0, false);
+                                   ceph_empty_snapc, 0, 0, false);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
                goto out;
@@ -1528,7 +1539,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
                                    ceph_vino(inode), 0, &len, 1, 3,
                                    CEPH_OSD_OP_WRITE,
                                    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
-                                   ci->i_snap_realm->cached_context,
+                                   ceph_empty_snapc,
                                    ci->i_truncate_seq, ci->i_truncate_size,
                                    false);
        if (IS_ERR(req)) {
@@ -1597,3 +1608,206 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma)
        vma->vm_ops = &ceph_vmops;
        return 0;
 }
+
+enum {
+       POOL_READ       = 1,
+       POOL_WRITE      = 2,
+};
+
+static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
+{
+       struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       struct ceph_osd_request *rd_req = NULL, *wr_req = NULL;
+       struct rb_node **p, *parent;
+       struct ceph_pool_perm *perm;
+       struct page **pages;
+       int err = 0, err2 = 0, have = 0;
+
+       down_read(&mdsc->pool_perm_rwsem);
+       p = &mdsc->pool_perm_tree.rb_node;
+       while (*p) {
+               perm = rb_entry(*p, struct ceph_pool_perm, node);
+               if (pool < perm->pool)
+                       p = &(*p)->rb_left;
+               else if (pool > perm->pool)
+                       p = &(*p)->rb_right;
+               else {
+                       have = perm->perm;
+                       break;
+               }
+       }
+       up_read(&mdsc->pool_perm_rwsem);
+       if (*p)
+               goto out;
+
+       dout("__ceph_pool_perm_get pool %u no perm cached\n", pool);
+
+       down_write(&mdsc->pool_perm_rwsem);
+       parent = NULL;
+       while (*p) {
+               parent = *p;
+               perm = rb_entry(parent, struct ceph_pool_perm, node);
+               if (pool < perm->pool)
+                       p = &(*p)->rb_left;
+               else if (pool > perm->pool)
+                       p = &(*p)->rb_right;
+               else {
+                       have = perm->perm;
+                       break;
+               }
+       }
+       if (*p) {
+               up_write(&mdsc->pool_perm_rwsem);
+               goto out;
+       }
+
+       rd_req = ceph_osdc_alloc_request(&fsc->client->osdc,
+                                        ceph_empty_snapc,
+                                        1, false, GFP_NOFS);
+       if (!rd_req) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       rd_req->r_flags = CEPH_OSD_FLAG_READ;
+       osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0);
+       rd_req->r_base_oloc.pool = pool;
+       snprintf(rd_req->r_base_oid.name, sizeof(rd_req->r_base_oid.name),
+                "%llx.00000000", ci->i_vino.ino);
+       rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name);
+
+       wr_req = ceph_osdc_alloc_request(&fsc->client->osdc,
+                                        ceph_empty_snapc,
+                                        1, false, GFP_NOFS);
+       if (!wr_req) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       wr_req->r_flags = CEPH_OSD_FLAG_WRITE |
+                         CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
+       osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
+       wr_req->r_base_oloc.pool = pool;
+       wr_req->r_base_oid = rd_req->r_base_oid;
+
+       /* one page should be large enough for STAT data */
+       pages = ceph_alloc_page_vector(1, GFP_KERNEL);
+       if (IS_ERR(pages)) {
+               err = PTR_ERR(pages);
+               goto out_unlock;
+       }
+
+       osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE,
+                                    0, false, true);
+       ceph_osdc_build_request(rd_req, 0, NULL, CEPH_NOSNAP,
+                               &ci->vfs_inode.i_mtime);
+       err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
+
+       ceph_osdc_build_request(wr_req, 0, NULL, CEPH_NOSNAP,
+                               &ci->vfs_inode.i_mtime);
+       err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
+
+       if (!err)
+               err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req);
+       if (!err2)
+               err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req);
+
+       if (err >= 0 || err == -ENOENT)
+               have |= POOL_READ;
+       else if (err != -EPERM)
+               goto out_unlock;
+
+       if (err2 == 0 || err2 == -EEXIST)
+               have |= POOL_WRITE;
+       else if (err2 != -EPERM) {
+               err = err2;
+               goto out_unlock;
+       }
+
+       perm = kmalloc(sizeof(*perm), GFP_NOFS);
+       if (!perm) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       perm->pool = pool;
+       perm->perm = have;
+       rb_link_node(&perm->node, parent, p);
+       rb_insert_color(&perm->node, &mdsc->pool_perm_tree);
+       err = 0;
+out_unlock:
+       up_write(&mdsc->pool_perm_rwsem);
+
+       if (rd_req)
+               ceph_osdc_put_request(rd_req);
+       if (wr_req)
+               ceph_osdc_put_request(wr_req);
+out:
+       if (!err)
+               err = have;
+       dout("__ceph_pool_perm_get pool %u result = %d\n", pool, err);
+       return err;
+}
+
+int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
+{
+       u32 pool;
+       int ret, flags;
+
+       if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
+                               NOPOOLPERM))
+               return 0;
+
+       spin_lock(&ci->i_ceph_lock);
+       flags = ci->i_ceph_flags;
+       pool = ceph_file_layout_pg_pool(ci->i_layout);
+       spin_unlock(&ci->i_ceph_lock);
+check:
+       if (flags & CEPH_I_POOL_PERM) {
+               if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) {
+                       dout("ceph_pool_perm_check pool %u no read perm\n",
+                            pool);
+                       return -EPERM;
+               }
+               if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) {
+                       dout("ceph_pool_perm_check pool %u no write perm\n",
+                            pool);
+                       return -EPERM;
+               }
+               return 0;
+       }
+
+       ret = __ceph_pool_perm_get(ci, pool);
+       if (ret < 0)
+               return ret;
+
+       flags = CEPH_I_POOL_PERM;
+       if (ret & POOL_READ)
+               flags |= CEPH_I_POOL_RD;
+       if (ret & POOL_WRITE)
+               flags |= CEPH_I_POOL_WR;
+
+       spin_lock(&ci->i_ceph_lock);
+       if (pool == ceph_file_layout_pg_pool(ci->i_layout)) {
+               ci->i_ceph_flags = flags;
+        } else {
+               pool = ceph_file_layout_pg_pool(ci->i_layout);
+               flags = ci->i_ceph_flags;
+       }
+       spin_unlock(&ci->i_ceph_lock);
+       goto check;
+}
+
+void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc)
+{
+       struct ceph_pool_perm *perm;
+       struct rb_node *n;
+
+       while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) {
+               n = rb_first(&mdsc->pool_perm_tree);
+               perm = rb_entry(n, struct ceph_pool_perm, node);
+               rb_erase(n, &mdsc->pool_perm_tree);
+               kfree(perm);
+       }
+}
index be5ea6af8366479b675e81e1d13e96139abed2c4..dc10c9dd36c1a2ac6264ed21d3248e5f62f1e330 100644 (file)
@@ -833,7 +833,9 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
                used |= CEPH_CAP_PIN;
        if (ci->i_rd_ref)
                used |= CEPH_CAP_FILE_RD;
-       if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
+       if (ci->i_rdcache_ref ||
+           (!S_ISDIR(ci->vfs_inode.i_mode) && /* ignore readdir cache */
+            ci->vfs_inode.i_data.nrpages))
                used |= CEPH_CAP_FILE_CACHE;
        if (ci->i_wr_ref)
                used |= CEPH_CAP_FILE_WR;
@@ -926,16 +928,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
        /* remove from session list */
        spin_lock(&session->s_cap_lock);
-       /*
-        * s_cap_reconnect is protected by s_cap_lock. no one changes
-        * s_cap_gen while session is in the reconnect state.
-        */
-       if (queue_release &&
-           (!session->s_cap_reconnect ||
-            cap->cap_gen == session->s_cap_gen))
-               __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
-                                   cap->mseq, cap->issue_seq);
-
        if (session->s_cap_iterator == cap) {
                /* not yet, we are iterating over this very cap */
                dout("__ceph_remove_cap  delaying %p removal from session %p\n",
@@ -948,6 +940,25 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
        }
        /* protect backpointer with s_cap_lock: see iterate_session_caps */
        cap->ci = NULL;
+
+       /*
+        * s_cap_reconnect is protected by s_cap_lock. no one changes
+        * s_cap_gen while session is in the reconnect state.
+        */
+       if (queue_release &&
+           (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) {
+               cap->queue_release = 1;
+               if (removed) {
+                       list_add_tail(&cap->session_caps,
+                                     &session->s_cap_releases);
+                       session->s_num_cap_releases++;
+                       removed = 0;
+               }
+       } else {
+               cap->queue_release = 0;
+       }
+       cap->cap_ino = ci->i_vino.ino;
+
        spin_unlock(&session->s_cap_lock);
 
        /* remove from inode list */
@@ -977,8 +988,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 static int send_cap_msg(struct ceph_mds_session *session,
                        u64 ino, u64 cid, int op,
                        int caps, int wanted, int dirty,
-                       u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq,
-                       u64 size, u64 max_size,
+                       u32 seq, u64 flush_tid, u64 oldest_flush_tid,
+                       u32 issue_seq, u32 mseq, u64 size, u64 max_size,
                        struct timespec *mtime, struct timespec *atime,
                        u64 time_warp_seq,
                        kuid_t uid, kgid_t gid, umode_t mode,
@@ -992,20 +1003,23 @@ static int send_cap_msg(struct ceph_mds_session *session,
        size_t extra_len;
 
        dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
-            " seq %u/%u mseq %u follows %lld size %llu/%llu"
+            " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu"
             " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op),
             cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted),
             ceph_cap_string(dirty),
-            seq, issue_seq, mseq, follows, size, max_size,
+            seq, issue_seq, flush_tid, oldest_flush_tid,
+            mseq, follows, size, max_size,
             xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
 
-       /* flock buffer size + inline version + inline data size */
-       extra_len = 4 + 8 + 4;
+       /* flock buffer size + inline version + inline data size +
+        * osd_epoch_barrier + oldest_flush_tid */
+       extra_len = 4 + 8 + 4 + 4 + 8;
        msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
                           GFP_NOFS, false);
        if (!msg)
                return -ENOMEM;
 
+       msg->hdr.version = cpu_to_le16(6);
        msg->hdr.tid = cpu_to_le64(flush_tid);
 
        fc = msg->front.iov_base;
@@ -1041,6 +1055,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
        ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
        /* inline data size */
        ceph_encode_32(&p, 0);
+       /* osd_epoch_barrier */
+       ceph_encode_32(&p, 0);
+       /* oldest_flush_tid */
+       ceph_encode_64(&p, oldest_flush_tid);
 
        fc->xattr_version = cpu_to_le64(xattr_version);
        if (xattrs_buf) {
@@ -1053,44 +1071,6 @@ static int send_cap_msg(struct ceph_mds_session *session,
        return 0;
 }
 
-void __queue_cap_release(struct ceph_mds_session *session,
-                        u64 ino, u64 cap_id, u32 migrate_seq,
-                        u32 issue_seq)
-{
-       struct ceph_msg *msg;
-       struct ceph_mds_cap_release *head;
-       struct ceph_mds_cap_item *item;
-
-       BUG_ON(!session->s_num_cap_releases);
-       msg = list_first_entry(&session->s_cap_releases,
-                              struct ceph_msg, list_head);
-
-       dout(" adding %llx release to mds%d msg %p (%d left)\n",
-            ino, session->s_mds, msg, session->s_num_cap_releases);
-
-       BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
-       head = msg->front.iov_base;
-       le32_add_cpu(&head->num, 1);
-       item = msg->front.iov_base + msg->front.iov_len;
-       item->ino = cpu_to_le64(ino);
-       item->cap_id = cpu_to_le64(cap_id);
-       item->migrate_seq = cpu_to_le32(migrate_seq);
-       item->seq = cpu_to_le32(issue_seq);
-
-       session->s_num_cap_releases--;
-
-       msg->front.iov_len += sizeof(*item);
-       if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
-               dout(" release msg %p full\n", msg);
-               list_move_tail(&msg->list_head, &session->s_cap_releases_done);
-       } else {
-               dout(" release msg %p at %d/%d (%d)\n", msg,
-                    (int)le32_to_cpu(head->num),
-                    (int)CEPH_CAPS_PER_RELEASE,
-                    (int)msg->front.iov_len);
-       }
-}
-
 /*
  * Queue cap releases when an inode is dropped from our cache.  Since
  * inode is about to be destroyed, there is no need for i_ceph_lock.
@@ -1127,7 +1107,7 @@ void ceph_queue_caps_release(struct inode *inode)
  */
 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
                      int op, int used, int want, int retain, int flushing,
-                     unsigned *pflush_tid)
+                     u64 flush_tid, u64 oldest_flush_tid)
        __releases(cap->ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = cap->ci;
@@ -1145,8 +1125,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        u64 xattr_version = 0;
        struct ceph_buffer *xattr_blob = NULL;
        int delayed = 0;
-       u64 flush_tid = 0;
-       int i;
        int ret;
        bool inline_data;
 
@@ -1190,26 +1168,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        cap->implemented &= cap->issued | used;
        cap->mds_wanted = want;
 
-       if (flushing) {
-               /*
-                * assign a tid for flush operations so we can avoid
-                * flush1 -> dirty1 -> flush2 -> flushack1 -> mark
-                * clean type races.  track latest tid for every bit
-                * so we can handle flush AxFw, flush Fw, and have the
-                * first ack clean Ax.
-                */
-               flush_tid = ++ci->i_cap_flush_last_tid;
-               if (pflush_tid)
-                       *pflush_tid = flush_tid;
-               dout(" cap_flush_tid %d\n", (int)flush_tid);
-               for (i = 0; i < CEPH_CAP_BITS; i++)
-                       if (flushing & (1 << i))
-                               ci->i_cap_flush_tid[i] = flush_tid;
-
-               follows = ci->i_head_snapc->seq;
-       } else {
-               follows = 0;
-       }
+       follows = flushing ? ci->i_head_snapc->seq : 0;
 
        keep = cap->implemented;
        seq = cap->seq;
@@ -1237,7 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        spin_unlock(&ci->i_ceph_lock);
 
        ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
-               op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
+               op, keep, want, flushing, seq,
+               flush_tid, oldest_flush_tid, issue_seq, mseq,
                size, max_size, &mtime, &atime, time_warp_seq,
                uid, gid, mode, xattr_version, xattr_blob,
                follows, inline_data);
@@ -1259,14 +1219,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
  * asynchronously back to the MDS once sync writes complete and dirty
  * data is written out.
  *
- * Unless @again is true, skip cap_snaps that were already sent to
+ * Unless @kick is true, skip cap_snaps that were already sent to
  * the MDS (i.e., during this session).
  *
  * Called under i_ceph_lock.  Takes s_mutex as needed.
  */
 void __ceph_flush_snaps(struct ceph_inode_info *ci,
                        struct ceph_mds_session **psession,
-                       int again)
+                       int kick)
                __releases(ci->i_ceph_lock)
                __acquires(ci->i_ceph_lock)
 {
@@ -1297,11 +1257,8 @@ retry:
                if (capsnap->dirty_pages || capsnap->writing)
                        break;
 
-               /*
-                * if cap writeback already occurred, we should have dropped
-                * the capsnap in ceph_put_wrbuffer_cap_refs.
-                */
-               BUG_ON(capsnap->dirty == 0);
+               /* should be removed by ceph_try_drop_cap_snap() */
+               BUG_ON(!capsnap->need_flush);
 
                /* pick mds, take s_mutex */
                if (ci->i_auth_cap == NULL) {
@@ -1310,7 +1267,7 @@ retry:
                }
 
                /* only flush each capsnap once */
-               if (!again && !list_empty(&capsnap->flushing_item)) {
+               if (!kick && !list_empty(&capsnap->flushing_item)) {
                        dout("already flushed %p, skipping\n", capsnap);
                        continue;
                }
@@ -1320,6 +1277,9 @@ retry:
 
                if (session && session->s_mds != mds) {
                        dout("oops, wrong session %p mutex\n", session);
+                       if (kick)
+                               goto out;
+
                        mutex_unlock(&session->s_mutex);
                        ceph_put_mds_session(session);
                        session = NULL;
@@ -1343,20 +1303,22 @@ retry:
                        goto retry;
                }
 
-               capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
+               spin_lock(&mdsc->cap_dirty_lock);
+               capsnap->flush_tid = ++mdsc->last_cap_flush_tid;
+               spin_unlock(&mdsc->cap_dirty_lock);
+
                atomic_inc(&capsnap->nref);
-               if (!list_empty(&capsnap->flushing_item))
-                       list_del_init(&capsnap->flushing_item);
-               list_add_tail(&capsnap->flushing_item,
-                             &session->s_cap_snaps_flushing);
+               if (list_empty(&capsnap->flushing_item))
+                       list_add_tail(&capsnap->flushing_item,
+                                     &session->s_cap_snaps_flushing);
                spin_unlock(&ci->i_ceph_lock);
 
                dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
                     inode, capsnap, capsnap->follows, capsnap->flush_tid);
                send_cap_msg(session, ceph_vino(inode).ino, 0,
                             CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
-                            capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
-                            capsnap->size, 0,
+                            capsnap->dirty, 0, capsnap->flush_tid, 0,
+                            0, mseq, capsnap->size, 0,
                             &capsnap->mtime, &capsnap->atime,
                             capsnap->time_warp_seq,
                             capsnap->uid, capsnap->gid, capsnap->mode,
@@ -1396,7 +1358,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
  * Caller is then responsible for calling __mark_inode_dirty with the
  * returned flags value.
  */
-int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
+                          struct ceph_cap_flush **pcf)
 {
        struct ceph_mds_client *mdsc =
                ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1416,9 +1379,14 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
             ceph_cap_string(was | mask));
        ci->i_dirty_caps |= mask;
        if (was == 0) {
-               if (!ci->i_head_snapc)
+               WARN_ON_ONCE(ci->i_prealloc_cap_flush);
+               swap(ci->i_prealloc_cap_flush, *pcf);
+
+               if (!ci->i_head_snapc) {
+                       WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem));
                        ci->i_head_snapc = ceph_get_snap_context(
                                ci->i_snap_realm->cached_context);
+               }
                dout(" inode %p now dirty snapc %p auth cap %p\n",
                     &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
                BUG_ON(!list_empty(&ci->i_dirty_item));
@@ -1429,6 +1397,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
                        ihold(inode);
                        dirty |= I_DIRTY_SYNC;
                }
+       } else {
+               WARN_ON_ONCE(!ci->i_prealloc_cap_flush);
        }
        BUG_ON(list_empty(&ci->i_dirty_item));
        if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
@@ -1438,6 +1408,74 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
        return dirty;
 }
 
+static void __add_cap_flushing_to_inode(struct ceph_inode_info *ci,
+                                       struct ceph_cap_flush *cf)
+{
+       struct rb_node **p = &ci->i_cap_flush_tree.rb_node;
+       struct rb_node *parent = NULL;
+       struct ceph_cap_flush *other = NULL;
+
+       while (*p) {
+               parent = *p;
+               other = rb_entry(parent, struct ceph_cap_flush, i_node);
+
+               if (cf->tid < other->tid)
+                       p = &(*p)->rb_left;
+               else if (cf->tid > other->tid)
+                       p = &(*p)->rb_right;
+               else
+                       BUG();
+       }
+
+       rb_link_node(&cf->i_node, parent, p);
+       rb_insert_color(&cf->i_node, &ci->i_cap_flush_tree);
+}
+
+static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc,
+                                      struct ceph_cap_flush *cf)
+{
+       struct rb_node **p = &mdsc->cap_flush_tree.rb_node;
+       struct rb_node *parent = NULL;
+       struct ceph_cap_flush *other = NULL;
+
+       while (*p) {
+               parent = *p;
+               other = rb_entry(parent, struct ceph_cap_flush, g_node);
+
+               if (cf->tid < other->tid)
+                       p = &(*p)->rb_left;
+               else if (cf->tid > other->tid)
+                       p = &(*p)->rb_right;
+               else
+                       BUG();
+       }
+
+       rb_link_node(&cf->g_node, parent, p);
+       rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree);
+}
+
+struct ceph_cap_flush *ceph_alloc_cap_flush(void)
+{
+       return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+}
+
+void ceph_free_cap_flush(struct ceph_cap_flush *cf)
+{
+       if (cf)
+               kmem_cache_free(ceph_cap_flush_cachep, cf);
+}
+
+static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
+{
+       struct rb_node *n = rb_first(&mdsc->cap_flush_tree);
+       if (n) {
+               struct ceph_cap_flush *cf =
+                       rb_entry(n, struct ceph_cap_flush, g_node);
+               return cf->tid;
+       }
+       return 0;
+}
+
 /*
  * Add dirty inode to the flushing list.  Assigned a seq number so we
  * can wait for caps to flush without starving.
@@ -1445,14 +1483,17 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
  * Called under i_ceph_lock.
  */
 static int __mark_caps_flushing(struct inode *inode,
-                                struct ceph_mds_session *session)
+                               struct ceph_mds_session *session,
+                               u64 *flush_tid, u64 *oldest_flush_tid)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_cap_flush *cf = NULL;
        int flushing;
 
        BUG_ON(ci->i_dirty_caps == 0);
        BUG_ON(list_empty(&ci->i_dirty_item));
+       BUG_ON(!ci->i_prealloc_cap_flush);
 
        flushing = ci->i_dirty_caps;
        dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
@@ -1463,22 +1504,31 @@ static int __mark_caps_flushing(struct inode *inode,
        ci->i_dirty_caps = 0;
        dout(" inode %p now !dirty\n", inode);
 
+       swap(cf, ci->i_prealloc_cap_flush);
+       cf->caps = flushing;
+       cf->kick = false;
+
        spin_lock(&mdsc->cap_dirty_lock);
        list_del_init(&ci->i_dirty_item);
 
+       cf->tid = ++mdsc->last_cap_flush_tid;
+       __add_cap_flushing_to_mdsc(mdsc, cf);
+       *oldest_flush_tid = __get_oldest_flush_tid(mdsc);
+
        if (list_empty(&ci->i_flushing_item)) {
-               ci->i_cap_flush_seq = ++mdsc->cap_flush_seq;
                list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
                mdsc->num_cap_flushing++;
-               dout(" inode %p now flushing seq %lld\n", inode,
-                    ci->i_cap_flush_seq);
+               dout(" inode %p now flushing tid %llu\n", inode, cf->tid);
        } else {
                list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
-               dout(" inode %p now flushing (more) seq %lld\n", inode,
-                    ci->i_cap_flush_seq);
+               dout(" inode %p now flushing (more) tid %llu\n",
+                    inode, cf->tid);
        }
        spin_unlock(&mdsc->cap_dirty_lock);
 
+       __add_cap_flushing_to_inode(ci, cf);
+
+       *flush_tid = cf->tid;
        return flushing;
 }
 
@@ -1524,6 +1574,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = &ci->vfs_inode;
        struct ceph_cap *cap;
+       u64 flush_tid, oldest_flush_tid;
        int file_wanted, used, cap_used;
        int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
        int issued, implemented, want, retain, revoking, flushing = 0;
@@ -1553,13 +1604,13 @@ retry:
 retry_locked:
        file_wanted = __ceph_caps_file_wanted(ci);
        used = __ceph_caps_used(ci);
-       want = file_wanted | used;
        issued = __ceph_caps_issued(ci, &implemented);
        revoking = implemented & ~issued;
 
-       retain = want | CEPH_CAP_PIN;
+       want = file_wanted;
+       retain = file_wanted | used | CEPH_CAP_PIN;
        if (!mdsc->stopping && inode->i_nlink > 0) {
-               if (want) {
+               if (file_wanted) {
                        retain |= CEPH_CAP_ANY;       /* be greedy */
                } else if (S_ISDIR(inode->i_mode) &&
                           (issued & CEPH_CAP_FILE_SHARED) &&
@@ -1602,9 +1653,10 @@ retry_locked:
         * If we fail, it's because pages are locked.... try again later.
         */
        if ((!is_delayed || mdsc->stopping) &&
-           ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */
-           inode->i_data.nrpages &&                 /* have cached pages */
-           (file_wanted == 0 ||                     /* no open files */
+           !S_ISDIR(inode->i_mode) &&          /* ignore readdir cache */
+           ci->i_wrbuffer_ref == 0 &&          /* no dirty pages... */
+           inode->i_data.nrpages &&            /* have cached pages */
+           (file_wanted == 0 ||                /* no open files */
             (revoking & (CEPH_CAP_FILE_CACHE|
                          CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */
            !tried_invalidate) {
@@ -1742,17 +1794,25 @@ ack:
                        took_snap_rwsem = 1;
                }
 
-               if (cap == ci->i_auth_cap && ci->i_dirty_caps)
-                       flushing = __mark_caps_flushing(inode, session);
-               else
+               if (cap == ci->i_auth_cap && ci->i_dirty_caps) {
+                       flushing = __mark_caps_flushing(inode, session,
+                                                       &flush_tid,
+                                                       &oldest_flush_tid);
+               } else {
                        flushing = 0;
+                       flush_tid = 0;
+                       spin_lock(&mdsc->cap_dirty_lock);
+                       oldest_flush_tid = __get_oldest_flush_tid(mdsc);
+                       spin_unlock(&mdsc->cap_dirty_lock);
+               }
 
                mds = cap->mds;  /* remember mds, so we don't repeat */
                sent++;
 
                /* __send_cap drops i_ceph_lock */
                delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
-                                     want, retain, flushing, NULL);
+                                     want, retain, flushing,
+                                     flush_tid, oldest_flush_tid);
                goto retry; /* retake i_ceph_lock and restart our cap scan. */
        }
 
@@ -1781,12 +1841,13 @@ ack:
 /*
  * Try to flush dirty caps back to the auth mds.
  */
-static int try_flush_caps(struct inode *inode, unsigned *flush_tid)
+static int try_flush_caps(struct inode *inode, u64 *ptid)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       int flushing = 0;
        struct ceph_mds_session *session = NULL;
+       int flushing = 0;
+       u64 flush_tid = 0, oldest_flush_tid = 0;
 
 retry:
        spin_lock(&ci->i_ceph_lock);
@@ -1811,42 +1872,54 @@ retry:
                if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
                        goto out;
 
-               flushing = __mark_caps_flushing(inode, session);
+               flushing = __mark_caps_flushing(inode, session, &flush_tid,
+                                               &oldest_flush_tid);
 
                /* __send_cap drops i_ceph_lock */
                delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
-                                    cap->issued | cap->implemented, flushing,
-                                    flush_tid);
-               if (!delayed)
-                       goto out_unlocked;
+                                    (cap->issued | cap->implemented),
+                                    flushing, flush_tid, oldest_flush_tid);
 
-               spin_lock(&ci->i_ceph_lock);
-               __cap_delay_requeue(mdsc, ci);
+               if (delayed) {
+                       spin_lock(&ci->i_ceph_lock);
+                       __cap_delay_requeue(mdsc, ci);
+                       spin_unlock(&ci->i_ceph_lock);
+               }
+       } else {
+               struct rb_node *n = rb_last(&ci->i_cap_flush_tree);
+               if (n) {
+                       struct ceph_cap_flush *cf =
+                               rb_entry(n, struct ceph_cap_flush, i_node);
+                       flush_tid = cf->tid;
+               }
+               flushing = ci->i_flushing_caps;
+               spin_unlock(&ci->i_ceph_lock);
        }
 out:
-       spin_unlock(&ci->i_ceph_lock);
-out_unlocked:
        if (session)
                mutex_unlock(&session->s_mutex);
+
+       *ptid = flush_tid;
        return flushing;
 }
 
 /*
  * Return true if we've flushed caps through the given flush_tid.
  */
-static int caps_are_flushed(struct inode *inode, unsigned tid)
+static int caps_are_flushed(struct inode *inode, u64 flush_tid)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       int i, ret = 1;
+       struct ceph_cap_flush *cf;
+       struct rb_node *n;
+       int ret = 1;
 
        spin_lock(&ci->i_ceph_lock);
-       for (i = 0; i < CEPH_CAP_BITS; i++)
-               if ((ci->i_flushing_caps & (1 << i)) &&
-                   ci->i_cap_flush_tid[i] <= tid) {
-                       /* still flushing this bit */
+       n = rb_first(&ci->i_cap_flush_tree);
+       if (n) {
+               cf = rb_entry(n, struct ceph_cap_flush, i_node);
+               if (cf->tid <= flush_tid)
                        ret = 0;
-                       break;
-               }
+       }
        spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
@@ -1864,13 +1937,16 @@ static void sync_write_wait(struct inode *inode)
        struct ceph_osd_request *req;
        u64 last_tid;
 
+       if (!S_ISREG(inode->i_mode))
+               return;
+
        spin_lock(&ci->i_unsafe_lock);
        if (list_empty(head))
                goto out;
 
        /* set upper bound as _last_ entry in chain */
-       req = list_entry(head->prev, struct ceph_osd_request,
-                        r_unsafe_item);
+       req = list_last_entry(head, struct ceph_osd_request,
+                             r_unsafe_item);
        last_tid = req->r_tid;
 
        do {
@@ -1888,18 +1964,64 @@ static void sync_write_wait(struct inode *inode)
                 */
                if (list_empty(head))
                        break;
-               req = list_entry(head->next, struct ceph_osd_request,
-                                r_unsafe_item);
+               req = list_first_entry(head, struct ceph_osd_request,
+                                      r_unsafe_item);
        } while (req->r_tid < last_tid);
 out:
        spin_unlock(&ci->i_unsafe_lock);
 }
 
+/*
+ * wait for any uncommitted directory operations to commit.
+ */
+static int unsafe_dirop_wait(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct list_head *head = &ci->i_unsafe_dirops;
+       struct ceph_mds_request *req;
+       u64 last_tid;
+       int ret = 0;
+
+       if (!S_ISDIR(inode->i_mode))
+               return 0;
+
+       spin_lock(&ci->i_unsafe_lock);
+       if (list_empty(head))
+               goto out;
+
+       req = list_last_entry(head, struct ceph_mds_request,
+                             r_unsafe_dir_item);
+       last_tid = req->r_tid;
+
+       do {
+               ceph_mdsc_get_request(req);
+               spin_unlock(&ci->i_unsafe_lock);
+
+               dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n",
+                    inode, req->r_tid, last_tid);
+               ret = !wait_for_completion_timeout(&req->r_safe_completion,
+                                       ceph_timeout_jiffies(req->r_timeout));
+               if (ret)
+                       ret = -EIO;  /* timed out */
+
+               ceph_mdsc_put_request(req);
+
+               spin_lock(&ci->i_unsafe_lock);
+               if (ret || list_empty(head))
+                       break;
+               req = list_first_entry(head, struct ceph_mds_request,
+                                      r_unsafe_dir_item);
+       } while (req->r_tid < last_tid);
+out:
+       spin_unlock(&ci->i_unsafe_lock);
+       return ret;
+}
+
 int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
        struct inode *inode = file->f_mapping->host;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       unsigned flush_tid;
+       u64 flush_tid;
        int ret;
        int dirty;
 
@@ -1908,25 +2030,30 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret < 0)
-               return ret;
+               goto out;
+
+       if (datasync)
+               goto out;
+
        mutex_lock(&inode->i_mutex);
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
 
+       ret = unsafe_dirop_wait(inode);
+
        /*
         * only wait on non-file metadata writeback (the mds
         * can recover size and mtime, so we don't need to
         * wait for that)
         */
-       if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
-               dout("fsync waiting for flush_tid %u\n", flush_tid);
+       if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
                ret = wait_event_interruptible(ci->i_cap_wq,
-                                      caps_are_flushed(inode, flush_tid));
+                                       caps_are_flushed(inode, flush_tid));
        }
-
-       dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
        mutex_unlock(&inode->i_mutex);
+out:
+       dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
 }
 
@@ -1939,7 +2066,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       unsigned flush_tid;
+       u64 flush_tid;
        int err = 0;
        int dirty;
        int wait = wbc->sync_mode == WB_SYNC_ALL;
@@ -1994,6 +2121,104 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
        }
 }
 
+static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
+                               struct ceph_mds_session *session,
+                               struct ceph_inode_info *ci,
+                               bool kick_all)
+{
+       struct inode *inode = &ci->vfs_inode;
+       struct ceph_cap *cap;
+       struct ceph_cap_flush *cf;
+       struct rb_node *n;
+       int delayed = 0;
+       u64 first_tid = 0;
+       u64 oldest_flush_tid;
+
+       spin_lock(&mdsc->cap_dirty_lock);
+       oldest_flush_tid = __get_oldest_flush_tid(mdsc);
+       spin_unlock(&mdsc->cap_dirty_lock);
+
+       while (true) {
+               spin_lock(&ci->i_ceph_lock);
+               cap = ci->i_auth_cap;
+               if (!(cap && cap->session == session)) {
+                       pr_err("%p auth cap %p not mds%d ???\n", inode,
+                                       cap, session->s_mds);
+                       spin_unlock(&ci->i_ceph_lock);
+                       break;
+               }
+
+               for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
+                       cf = rb_entry(n, struct ceph_cap_flush, i_node);
+                       if (cf->tid < first_tid)
+                               continue;
+                       if (kick_all || cf->kick)
+                               break;
+               }
+               if (!n) {
+                       spin_unlock(&ci->i_ceph_lock);
+                       break;
+               }
+
+               cf = rb_entry(n, struct ceph_cap_flush, i_node);
+               cf->kick = false;
+
+               first_tid = cf->tid + 1;
+
+               dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode,
+                    cap, cf->tid, ceph_cap_string(cf->caps));
+               delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+                                     __ceph_caps_used(ci),
+                                     __ceph_caps_wanted(ci),
+                                     cap->issued | cap->implemented,
+                                     cf->caps, cf->tid, oldest_flush_tid);
+       }
+       return delayed;
+}
+
+void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
+                                  struct ceph_mds_session *session)
+{
+       struct ceph_inode_info *ci;
+       struct ceph_cap *cap;
+       struct ceph_cap_flush *cf;
+       struct rb_node *n;
+
+       dout("early_kick_flushing_caps mds%d\n", session->s_mds);
+       list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
+               spin_lock(&ci->i_ceph_lock);
+               cap = ci->i_auth_cap;
+               if (!(cap && cap->session == session)) {
+                       pr_err("%p auth cap %p not mds%d ???\n",
+                               &ci->vfs_inode, cap, session->s_mds);
+                       spin_unlock(&ci->i_ceph_lock);
+                       continue;
+               }
+
+
+               /*
+                * if flushing caps were revoked, we re-send the cap flush
+                * in client reconnect stage. This guarantees MDS * processes
+                * the cap flush message before issuing the flushing caps to
+                * other client.
+                */
+               if ((cap->issued & ci->i_flushing_caps) !=
+                   ci->i_flushing_caps) {
+                       spin_unlock(&ci->i_ceph_lock);
+                       if (!__kick_flushing_caps(mdsc, session, ci, true))
+                               continue;
+                       spin_lock(&ci->i_ceph_lock);
+               }
+
+               for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
+                       cf = rb_entry(n, struct ceph_cap_flush, i_node);
+                       cf->kick = true;
+               }
+
+               spin_unlock(&ci->i_ceph_lock);
+       }
+}
+
 void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                             struct ceph_mds_session *session)
 {
@@ -2003,28 +2228,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
 
        dout("kick_flushing_caps mds%d\n", session->s_mds);
        list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
-               struct inode *inode = &ci->vfs_inode;
-               struct ceph_cap *cap;
-               int delayed = 0;
-
-               spin_lock(&ci->i_ceph_lock);
-               cap = ci->i_auth_cap;
-               if (cap && cap->session == session) {
-                       dout("kick_flushing_caps %p cap %p %s\n", inode,
-                            cap, ceph_cap_string(ci->i_flushing_caps));
-                       delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-                                            __ceph_caps_used(ci),
-                                            __ceph_caps_wanted(ci),
-                                            cap->issued | cap->implemented,
-                                            ci->i_flushing_caps, NULL);
-                       if (delayed) {
-                               spin_lock(&ci->i_ceph_lock);
-                               __cap_delay_requeue(mdsc, ci);
-                               spin_unlock(&ci->i_ceph_lock);
-                       }
-               } else {
-                       pr_err("%p auth cap %p not mds%d ???\n", inode,
-                              cap, session->s_mds);
+               int delayed = __kick_flushing_caps(mdsc, session, ci, false);
+               if (delayed) {
+                       spin_lock(&ci->i_ceph_lock);
+                       __cap_delay_requeue(mdsc, ci);
                        spin_unlock(&ci->i_ceph_lock);
                }
        }
@@ -2036,26 +2243,25 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_cap *cap;
-       int delayed = 0;
 
        spin_lock(&ci->i_ceph_lock);
        cap = ci->i_auth_cap;
-       dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
-            ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+       dout("kick_flushing_inode_caps %p flushing %s\n", inode,
+            ceph_cap_string(ci->i_flushing_caps));
 
        __ceph_flush_snaps(ci, &session, 1);
 
        if (ci->i_flushing_caps) {
+               int delayed;
+
                spin_lock(&mdsc->cap_dirty_lock);
                list_move_tail(&ci->i_flushing_item,
                               &cap->session->s_cap_flushing);
                spin_unlock(&mdsc->cap_dirty_lock);
 
-               delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-                                    __ceph_caps_used(ci),
-                                    __ceph_caps_wanted(ci),
-                                    cap->issued | cap->implemented,
-                                    ci->i_flushing_caps, NULL);
+               spin_unlock(&ci->i_ceph_lock);
+
+               delayed = __kick_flushing_caps(mdsc, session, ci, true);
                if (delayed) {
                        spin_lock(&ci->i_ceph_lock);
                        __cap_delay_requeue(mdsc, ci);
@@ -2073,7 +2279,8 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
  *
  * Protected by i_ceph_lock.
  */
-static void __take_cap_refs(struct ceph_inode_info *ci, int got)
+static void __take_cap_refs(struct ceph_inode_info *ci, int got,
+                           bool snap_rwsem_locked)
 {
        if (got & CEPH_CAP_PIN)
                ci->i_pin_ref++;
@@ -2081,8 +2288,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
                ci->i_rd_ref++;
        if (got & CEPH_CAP_FILE_CACHE)
                ci->i_rdcache_ref++;
-       if (got & CEPH_CAP_FILE_WR)
+       if (got & CEPH_CAP_FILE_WR) {
+               if (ci->i_wr_ref == 0 && !ci->i_head_snapc) {
+                       BUG_ON(!snap_rwsem_locked);
+                       ci->i_head_snapc = ceph_get_snap_context(
+                                       ci->i_snap_realm->cached_context);
+               }
                ci->i_wr_ref++;
+       }
        if (got & CEPH_CAP_FILE_BUFFER) {
                if (ci->i_wb_ref == 0)
                        ihold(&ci->vfs_inode);
@@ -2100,16 +2313,19 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
  * requested from the MDS.
  */
 static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
-                           loff_t endoff, int *got, int *check_max, int *err)
+                           loff_t endoff, bool nonblock, int *got, int *err)
 {
        struct inode *inode = &ci->vfs_inode;
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        int ret = 0;
        int have, implemented;
        int file_wanted;
+       bool snap_rwsem_locked = false;
 
        dout("get_cap_refs %p need %s want %s\n", inode,
             ceph_cap_string(need), ceph_cap_string(want));
 
+again:
        spin_lock(&ci->i_ceph_lock);
 
        /* make sure file is actually open */
@@ -2125,6 +2341,10 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
        /* finish pending truncate */
        while (ci->i_truncate_pending) {
                spin_unlock(&ci->i_ceph_lock);
+               if (snap_rwsem_locked) {
+                       up_read(&mdsc->snap_rwsem);
+                       snap_rwsem_locked = false;
+               }
                __ceph_do_pending_vmtruncate(inode);
                spin_lock(&ci->i_ceph_lock);
        }
@@ -2136,7 +2356,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
                        dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
                             inode, endoff, ci->i_max_size);
                        if (endoff > ci->i_requested_max_size) {
-                               *check_max = 1;
+                               *err = -EAGAIN;
                                ret = 1;
                        }
                        goto out_unlock;
@@ -2164,8 +2384,29 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
                     inode, ceph_cap_string(have), ceph_cap_string(not),
                     ceph_cap_string(revoking));
                if ((revoking & not) == 0) {
+                       if (!snap_rwsem_locked &&
+                           !ci->i_head_snapc &&
+                           (need & CEPH_CAP_FILE_WR)) {
+                               if (!down_read_trylock(&mdsc->snap_rwsem)) {
+                                       /*
+                                        * we can not call down_read() when
+                                        * task isn't in TASK_RUNNING state
+                                        */
+                                       if (nonblock) {
+                                               *err = -EAGAIN;
+                                               ret = 1;
+                                               goto out_unlock;
+                                       }
+
+                                       spin_unlock(&ci->i_ceph_lock);
+                                       down_read(&mdsc->snap_rwsem);
+                                       snap_rwsem_locked = true;
+                                       goto again;
+                               }
+                               snap_rwsem_locked = true;
+                       }
                        *got = need | (have & want);
-                       __take_cap_refs(ci, *got);
+                       __take_cap_refs(ci, *got, true);
                        ret = 1;
                }
        } else {
@@ -2189,6 +2430,8 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
        }
 out_unlock:
        spin_unlock(&ci->i_ceph_lock);
+       if (snap_rwsem_locked)
+               up_read(&mdsc->snap_rwsem);
 
        dout("get_cap_refs %p ret %d got %s\n", inode,
             ret, ceph_cap_string(*got));
@@ -2231,50 +2474,70 @@ static void check_max_size(struct inode *inode, loff_t endoff)
 int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
                  loff_t endoff, int *got, struct page **pinned_page)
 {
-       int _got, check_max, ret, err = 0;
+       int _got, ret, err = 0;
 
-retry:
-       if (endoff > 0)
-               check_max_size(&ci->vfs_inode, endoff);
-       _got = 0;
-       check_max = 0;
-       ret = wait_event_interruptible(ci->i_cap_wq,
-                               try_get_cap_refs(ci, need, want, endoff,
-                                                &_got, &check_max, &err));
-       if (err)
-               ret = err;
+       ret = ceph_pool_perm_check(ci, need);
        if (ret < 0)
                return ret;
 
-       if (check_max)
-               goto retry;
+       while (true) {
+               if (endoff > 0)
+                       check_max_size(&ci->vfs_inode, endoff);
 
-       if (ci->i_inline_version != CEPH_INLINE_NONE &&
-           (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-           i_size_read(&ci->vfs_inode) > 0) {
-               struct page *page = find_get_page(ci->vfs_inode.i_mapping, 0);
-               if (page) {
-                       if (PageUptodate(page)) {
-                               *pinned_page = page;
-                               goto out;
-                       }
-                       page_cache_release(page);
-               }
-               /*
-                * drop cap refs first because getattr while holding
-                * caps refs can cause deadlock.
-                */
-               ceph_put_cap_refs(ci, _got);
+               err = 0;
                _got = 0;
+               ret = try_get_cap_refs(ci, need, want, endoff,
+                                      false, &_got, &err);
+               if (ret) {
+                       if (err == -EAGAIN)
+                               continue;
+                       if (err < 0)
+                               return err;
+               } else {
+                       ret = wait_event_interruptible(ci->i_cap_wq,
+                                       try_get_cap_refs(ci, need, want, endoff,
+                                                        true, &_got, &err));
+                       if (err == -EAGAIN)
+                               continue;
+                       if (err < 0)
+                               ret = err;
+                       if (ret < 0)
+                               return ret;
+               }
 
-               /* getattr request will bring inline data into page cache */
-               ret = __ceph_do_getattr(&ci->vfs_inode, NULL,
-                                       CEPH_STAT_CAP_INLINE_DATA, true);
-               if (ret < 0)
-                       return ret;
-               goto retry;
+               if (ci->i_inline_version != CEPH_INLINE_NONE &&
+                   (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+                   i_size_read(&ci->vfs_inode) > 0) {
+                       struct page *page =
+                               find_get_page(ci->vfs_inode.i_mapping, 0);
+                       if (page) {
+                               if (PageUptodate(page)) {
+                                       *pinned_page = page;
+                                       break;
+                               }
+                               page_cache_release(page);
+                       }
+                       /*
+                        * drop cap refs first because getattr while
+                        * holding * caps refs can cause deadlock.
+                        */
+                       ceph_put_cap_refs(ci, _got);
+                       _got = 0;
+
+                       /*
+                        * getattr request will bring inline data into
+                        * page cache
+                        */
+                       ret = __ceph_do_getattr(&ci->vfs_inode, NULL,
+                                               CEPH_STAT_CAP_INLINE_DATA,
+                                               true);
+                       if (ret < 0)
+                               return ret;
+                       continue;
+               }
+               break;
        }
-out:
+
        *got = _got;
        return 0;
 }
@@ -2286,10 +2549,31 @@ out:
 void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
 {
        spin_lock(&ci->i_ceph_lock);
-       __take_cap_refs(ci, caps);
+       __take_cap_refs(ci, caps, false);
        spin_unlock(&ci->i_ceph_lock);
 }
 
+
+/*
+ * drop cap_snap that is not associated with any snapshot.
+ * we don't need to send FLUSHSNAP message for it.
+ */
+static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap)
+{
+       if (!capsnap->need_flush &&
+           !capsnap->writing && !capsnap->dirty_pages) {
+
+               dout("dropping cap_snap %p follows %llu\n",
+                    capsnap, capsnap->follows);
+               ceph_put_snap_context(capsnap->context);
+               list_del(&capsnap->ci_item);
+               list_del(&capsnap->flushing_item);
+               ceph_put_cap_snap(capsnap);
+               return 1;
+       }
+       return 0;
+}
+
 /*
  * Release cap refs.
  *
@@ -2303,7 +2587,6 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 {
        struct inode *inode = &ci->vfs_inode;
        int last = 0, put = 0, flushsnaps = 0, wake = 0;
-       struct ceph_cap_snap *capsnap;
 
        spin_lock(&ci->i_ceph_lock);
        if (had & CEPH_CAP_PIN)
@@ -2325,17 +2608,24 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
        if (had & CEPH_CAP_FILE_WR)
                if (--ci->i_wr_ref == 0) {
                        last++;
-                       if (!list_empty(&ci->i_cap_snaps)) {
-                               capsnap = list_first_entry(&ci->i_cap_snaps,
-                                                    struct ceph_cap_snap,
-                                                    ci_item);
-                               if (capsnap->writing) {
-                                       capsnap->writing = 0;
-                                       flushsnaps =
-                                               __ceph_finish_cap_snap(ci,
-                                                                      capsnap);
-                                       wake = 1;
-                               }
+                       if (__ceph_have_pending_cap_snap(ci)) {
+                               struct ceph_cap_snap *capsnap =
+                                       list_last_entry(&ci->i_cap_snaps,
+                                                       struct ceph_cap_snap,
+                                                       ci_item);
+                               capsnap->writing = 0;
+                               if (ceph_try_drop_cap_snap(capsnap))
+                                       put++;
+                               else if (__ceph_finish_cap_snap(ci, capsnap))
+                                       flushsnaps = 1;
+                               wake = 1;
+                       }
+                       if (ci->i_wrbuffer_ref_head == 0 &&
+                           ci->i_dirty_caps == 0 &&
+                           ci->i_flushing_caps == 0) {
+                               BUG_ON(!ci->i_head_snapc);
+                               ceph_put_snap_context(ci->i_head_snapc);
+                               ci->i_head_snapc = NULL;
                        }
                        /* see comment in __ceph_remove_cap() */
                        if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
@@ -2352,7 +2642,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                ceph_flush_snaps(ci);
        if (wake)
                wake_up_all(&ci->i_cap_wq);
-       if (put)
+       while (put-- > 0)
                iput(inode);
 }
 
@@ -2380,7 +2670,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
        if (ci->i_head_snapc == snapc) {
                ci->i_wrbuffer_ref_head -= nr;
                if (ci->i_wrbuffer_ref_head == 0 &&
-                   ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
+                   ci->i_wr_ref == 0 &&
+                   ci->i_dirty_caps == 0 &&
+                   ci->i_flushing_caps == 0) {
                        BUG_ON(!ci->i_head_snapc);
                        ceph_put_snap_context(ci->i_head_snapc);
                        ci->i_head_snapc = NULL;
@@ -2401,25 +2693,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                capsnap->dirty_pages -= nr;
                if (capsnap->dirty_pages == 0) {
                        complete_capsnap = 1;
-                       if (capsnap->dirty == 0)
-                               /* cap writeback completed before we created
-                                * the cap_snap; no FLUSHSNAP is needed */
-                               drop_capsnap = 1;
+                       drop_capsnap = ceph_try_drop_cap_snap(capsnap);
                }
                dout("put_wrbuffer_cap_refs on %p cap_snap %p "
-                    " snap %lld %d/%d -> %d/%d %s%s%s\n",
+                    " snap %lld %d/%d -> %d/%d %s%s\n",
                     inode, capsnap, capsnap->context->seq,
                     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
                     ci->i_wrbuffer_ref, capsnap->dirty_pages,
                     last ? " (wrbuffer last)" : "",
-                    complete_capsnap ? " (complete capsnap)" : "",
-                    drop_capsnap ? " (drop capsnap)" : "");
-               if (drop_capsnap) {
-                       ceph_put_snap_context(capsnap->context);
-                       list_del(&capsnap->ci_item);
-                       list_del(&capsnap->flushing_item);
-                       ceph_put_cap_snap(capsnap);
-               }
+                    complete_capsnap ? " (complete capsnap)" : "");
        }
 
        spin_unlock(&ci->i_ceph_lock);
@@ -2526,7 +2808,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
         * try to invalidate (once).  (If there are dirty buffers, we
         * will invalidate _after_ writeback.)
         */
-       if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
+       if (!S_ISDIR(inode->i_mode) && /* don't invalidate readdir cache */
+           ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
            (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
            !ci->i_wrbuffer_ref) {
                if (try_nonblocking_invalidate(inode)) {
@@ -2732,16 +3015,29 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+       struct ceph_cap_flush *cf;
+       struct rb_node *n;
+       LIST_HEAD(to_remove);
        unsigned seq = le32_to_cpu(m->seq);
        int dirty = le32_to_cpu(m->dirty);
        int cleaned = 0;
        int drop = 0;
-       int i;
 
-       for (i = 0; i < CEPH_CAP_BITS; i++)
-               if ((dirty & (1 << i)) &&
-                   (u16)flush_tid == ci->i_cap_flush_tid[i])
-                       cleaned |= 1 << i;
+       n = rb_first(&ci->i_cap_flush_tree);
+       while (n) {
+               cf = rb_entry(n, struct ceph_cap_flush, i_node);
+               n = rb_next(&cf->i_node);
+               if (cf->tid == flush_tid)
+                       cleaned = cf->caps;
+               if (cf->tid <= flush_tid) {
+                       rb_erase(&cf->i_node, &ci->i_cap_flush_tree);
+                       list_add_tail(&cf->list, &to_remove);
+               } else {
+                       cleaned &= ~cf->caps;
+                       if (!cleaned)
+                               break;
+               }
+       }
 
        dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
             " flushing %s -> %s\n",
@@ -2749,12 +3045,23 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
             ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
             ceph_cap_string(ci->i_flushing_caps & ~cleaned));
 
-       if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned))
+       if (list_empty(&to_remove) && !cleaned)
                goto out;
 
        ci->i_flushing_caps &= ~cleaned;
 
        spin_lock(&mdsc->cap_dirty_lock);
+
+       if (!list_empty(&to_remove)) {
+               list_for_each_entry(cf, &to_remove, list)
+                       rb_erase(&cf->g_node, &mdsc->cap_flush_tree);
+
+               n = rb_first(&mdsc->cap_flush_tree);
+               cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL;
+               if (!cf || cf->tid > flush_tid)
+                       wake_up_all(&mdsc->cap_flushing_wq);
+       }
+
        if (ci->i_flushing_caps == 0) {
                list_del_init(&ci->i_flushing_item);
                if (!list_empty(&session->s_cap_flushing))
@@ -2764,14 +3071,14 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
                                         struct ceph_inode_info,
                                         i_flushing_item)->vfs_inode);
                mdsc->num_cap_flushing--;
-               wake_up_all(&mdsc->cap_flushing_wq);
                dout(" inode %p now !flushing\n", inode);
 
                if (ci->i_dirty_caps == 0) {
                        dout(" inode %p now clean\n", inode);
                        BUG_ON(!list_empty(&ci->i_dirty_item));
                        drop = 1;
-                       if (ci->i_wrbuffer_ref_head == 0) {
+                       if (ci->i_wr_ref == 0 &&
+                           ci->i_wrbuffer_ref_head == 0) {
                                BUG_ON(!ci->i_head_snapc);
                                ceph_put_snap_context(ci->i_head_snapc);
                                ci->i_head_snapc = NULL;
@@ -2785,6 +3092,13 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 
 out:
        spin_unlock(&ci->i_ceph_lock);
+
+       while (!list_empty(&to_remove)) {
+               cf = list_first_entry(&to_remove,
+                                     struct ceph_cap_flush, list);
+               list_del(&cf->list);
+               ceph_free_cap_flush(cf);
+       }
        if (drop)
                iput(inode);
 }
@@ -2800,6 +3114,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
                                     struct ceph_mds_session *session)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        u64 follows = le64_to_cpu(m->snap_follows);
        struct ceph_cap_snap *capsnap;
        int drop = 0;
@@ -2823,6 +3138,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
                        list_del(&capsnap->ci_item);
                        list_del(&capsnap->flushing_item);
                        ceph_put_cap_snap(capsnap);
+                       wake_up_all(&mdsc->cap_flushing_wq);
                        drop = 1;
                        break;
                } else {
@@ -2971,7 +3287,6 @@ retry:
                        mutex_lock_nested(&session->s_mutex,
                                          SINGLE_DEPTH_NESTING);
                }
-               ceph_add_cap_releases(mdsc, tsession);
                new_cap = ceph_get_cap(mdsc, NULL);
        } else {
                WARN_ON(1);
@@ -3167,16 +3482,20 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
             (unsigned)seq);
 
-       if (op == CEPH_CAP_OP_IMPORT)
-               ceph_add_cap_releases(mdsc, session);
-
        if (!inode) {
                dout(" i don't have ino %llx\n", vino.ino);
 
                if (op == CEPH_CAP_OP_IMPORT) {
+                       cap = ceph_get_cap(mdsc, NULL);
+                       cap->cap_ino = vino.ino;
+                       cap->queue_release = 1;
+                       cap->cap_id = cap_id;
+                       cap->mseq = mseq;
+                       cap->seq = seq;
                        spin_lock(&session->s_cap_lock);
-                       __queue_cap_release(session, vino.ino, cap_id,
-                                           mseq, seq);
+                       list_add_tail(&cap->session_caps,
+                                       &session->s_cap_releases);
+                       session->s_num_cap_releases++;
                        spin_unlock(&session->s_cap_lock);
                }
                goto flush_cap_releases;
@@ -3252,11 +3571,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
 flush_cap_releases:
        /*
-        * send any full release message to try to move things
+        * send any cap release message to try to move things
         * along for the mds (who clearly thinks we still have this
         * cap).
         */
-       ceph_add_cap_releases(mdsc, session);
        ceph_send_cap_releases(mdsc, session);
 
 done:
index 4248307fea909c6f1555758e536a3f733ad5f95d..9314b4ea2375145647aa16446a1f33ae2c8106b2 100644 (file)
@@ -38,7 +38,7 @@ int ceph_init_dentry(struct dentry *dentry)
        if (dentry->d_fsdata)
                return 0;
 
-       di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
+       di = kmem_cache_alloc(ceph_dentry_cachep, GFP_KERNEL | __GFP_ZERO);
        if (!di)
                return -ENOMEM;          /* oh well */
 
@@ -106,6 +106,27 @@ static int fpos_cmp(loff_t l, loff_t r)
        return (int)(fpos_off(l) - fpos_off(r));
 }
 
+/*
+ * make note of the last dentry we read, so we can
+ * continue at the same lexicographical point,
+ * regardless of what dir changes take place on the
+ * server.
+ */
+static int note_last_dentry(struct ceph_file_info *fi, const char *name,
+                           int len, unsigned next_offset)
+{
+       char *buf = kmalloc(len+1, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+       kfree(fi->last_name);
+       fi->last_name = buf;
+       memcpy(fi->last_name, name, len);
+       fi->last_name[len] = 0;
+       fi->next_offset = next_offset;
+       dout("note_last_dentry '%s'\n", fi->last_name);
+       return 0;
+}
+
 /*
  * When possible, we try to satisfy a readdir by peeking at the
  * dcache.  We make this work by carefully ordering dentries on
@@ -123,123 +144,113 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
        struct ceph_file_info *fi = file->private_data;
        struct dentry *parent = file->f_path.dentry;
        struct inode *dir = d_inode(parent);
-       struct list_head *p;
-       struct dentry *dentry, *last;
+       struct dentry *dentry, *last = NULL;
        struct ceph_dentry_info *di;
+       unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *);
        int err = 0;
+       loff_t ptr_pos = 0;
+       struct ceph_readdir_cache_control cache_ctl = {};
 
-       /* claim ref on last dentry we returned */
-       last = fi->dentry;
-       fi->dentry = NULL;
-
-       dout("__dcache_readdir %p v%u at %llu (last %p)\n",
-            dir, shared_gen, ctx->pos, last);
+       dout("__dcache_readdir %p v%u at %llu\n", dir, shared_gen, ctx->pos);
 
-       spin_lock(&parent->d_lock);
-
-       /* start at beginning? */
-       if (ctx->pos == 2 || last == NULL ||
-           fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
-               if (list_empty(&parent->d_subdirs))
-                       goto out_unlock;
-               p = parent->d_subdirs.prev;
-               dout(" initial p %p/%p\n", p->prev, p->next);
-       } else {
-               p = last->d_child.prev;
+       /* we can calculate cache index for the first dirfrag */
+       if (ceph_frag_is_leftmost(fpos_frag(ctx->pos))) {
+               cache_ctl.index = fpos_off(ctx->pos) - 2;
+               BUG_ON(cache_ctl.index < 0);
+               ptr_pos = cache_ctl.index * sizeof(struct dentry *);
        }
 
-more:
-       dentry = list_entry(p, struct dentry, d_child);
-       di = ceph_dentry(dentry);
-       while (1) {
-               dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
-                    d_unhashed(dentry) ? "!hashed" : "hashed",
-                    parent->d_subdirs.prev, parent->d_subdirs.next);
-               if (p == &parent->d_subdirs) {
+       while (true) {
+               pgoff_t pgoff;
+               bool emit_dentry;
+
+               if (ptr_pos >= i_size_read(dir)) {
                        fi->flags |= CEPH_F_ATEND;
-                       goto out_unlock;
+                       err = 0;
+                       break;
+               }
+
+               err = -EAGAIN;
+               pgoff = ptr_pos >> PAGE_CACHE_SHIFT;
+               if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) {
+                       ceph_readdir_cache_release(&cache_ctl);
+                       cache_ctl.page = find_lock_page(&dir->i_data, pgoff);
+                       if (!cache_ctl.page) {
+                               dout(" page %lu not found\n", pgoff);
+                               break;
+                       }
+                       /* reading/filling the cache are serialized by
+                        * i_mutex, no need to use page lock */
+                       unlock_page(cache_ctl.page);
+                       cache_ctl.dentries = kmap(cache_ctl.page);
                }
-               spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+               rcu_read_lock();
+               spin_lock(&parent->d_lock);
+               /* check i_size again here, because empty directory can be
+                * marked as complete while not holding the i_mutex. */
+               if (ceph_dir_is_complete_ordered(dir) &&
+                   ptr_pos < i_size_read(dir))
+                       dentry = cache_ctl.dentries[cache_ctl.index % nsize];
+               else
+                       dentry = NULL;
+               spin_unlock(&parent->d_lock);
+               if (dentry && !lockref_get_not_dead(&dentry->d_lockref))
+                       dentry = NULL;
+               rcu_read_unlock();
+               if (!dentry)
+                       break;
+
+               emit_dentry = false;
+               di = ceph_dentry(dentry);
+               spin_lock(&dentry->d_lock);
                if (di->lease_shared_gen == shared_gen &&
-                   !d_unhashed(dentry) && d_really_is_positive(dentry) &&
+                   d_really_is_positive(dentry) &&
                    ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR &&
                    ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH &&
-                   fpos_cmp(ctx->pos, di->offset) <= 0)
-                       break;
-               dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry,
-                    dentry, di->offset,
-                    ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
-                    !d_inode(dentry) ? " null" : "");
+                   fpos_cmp(ctx->pos, di->offset) <= 0) {
+                       emit_dentry = true;
+               }
                spin_unlock(&dentry->d_lock);
-               p = p->prev;
-               dentry = list_entry(p, struct dentry, d_child);
-               di = ceph_dentry(dentry);
-       }
-
-       dget_dlock(dentry);
-       spin_unlock(&dentry->d_lock);
-       spin_unlock(&parent->d_lock);
 
-       /* make sure a dentry wasn't dropped while we didn't have parent lock */
-       if (!ceph_dir_is_complete_ordered(dir)) {
-               dout(" lost dir complete on %p; falling back to mds\n", dir);
-               dput(dentry);
-               err = -EAGAIN;
-               goto out;
-       }
+               if (emit_dentry) {
+                       dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
+                            dentry, dentry, d_inode(dentry));
+                       ctx->pos = di->offset;
+                       if (!dir_emit(ctx, dentry->d_name.name,
+                                     dentry->d_name.len,
+                                     ceph_translate_ino(dentry->d_sb,
+                                                        d_inode(dentry)->i_ino),
+                                     d_inode(dentry)->i_mode >> 12)) {
+                               dput(dentry);
+                               err = 0;
+                               break;
+                       }
+                       ctx->pos++;
 
-       dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
-            dentry, dentry, d_inode(dentry));
-       if (!dir_emit(ctx, dentry->d_name.name,
-                     dentry->d_name.len,
-                     ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino),
-                     d_inode(dentry)->i_mode >> 12)) {
-               if (last) {
-                       /* remember our position */
-                       fi->dentry = last;
-                       fi->next_offset = fpos_off(di->offset);
+                       if (last)
+                               dput(last);
+                       last = dentry;
+               } else {
+                       dput(dentry);
                }
-               dput(dentry);
-               return 0;
-       }
-
-       ctx->pos = di->offset + 1;
 
-       if (last)
-               dput(last);
-       last = dentry;
-
-       spin_lock(&parent->d_lock);
-       p = p->prev;    /* advance to next dentry */
-       goto more;
-
-out_unlock:
-       spin_unlock(&parent->d_lock);
-out:
-       if (last)
+               cache_ctl.index++;
+               ptr_pos += sizeof(struct dentry *);
+       }
+       ceph_readdir_cache_release(&cache_ctl);
+       if (last) {
+               int ret;
+               di = ceph_dentry(last);
+               ret = note_last_dentry(fi, last->d_name.name, last->d_name.len,
+                                      fpos_off(di->offset) + 1);
+               if (ret < 0)
+                       err = ret;
                dput(last);
+       }
        return err;
 }
 
-/*
- * make note of the last dentry we read, so we can
- * continue at the same lexicographical point,
- * regardless of what dir changes take place on the
- * server.
- */
-static int note_last_dentry(struct ceph_file_info *fi, const char *name,
-                           int len)
-{
-       kfree(fi->last_name);
-       fi->last_name = kmalloc(len+1, GFP_NOFS);
-       if (!fi->last_name)
-               return -ENOMEM;
-       memcpy(fi->last_name, name, len);
-       fi->last_name[len] = 0;
-       dout("note_last_dentry '%s'\n", fi->last_name);
-       return 0;
-}
-
 static int ceph_readdir(struct file *file, struct dir_context *ctx)
 {
        struct ceph_file_info *fi = file->private_data;
@@ -280,8 +291,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 
        /* can we use the dcache? */
        spin_lock(&ci->i_ceph_lock);
-       if ((ctx->pos == 2 || fi->dentry) &&
-           ceph_test_mount_opt(fsc, DCACHE) &&
+       if (ceph_test_mount_opt(fsc, DCACHE) &&
            !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
            ceph_snap(inode) != CEPH_SNAPDIR &&
            __ceph_dir_is_complete_ordered(ci) &&
@@ -296,24 +306,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
        } else {
                spin_unlock(&ci->i_ceph_lock);
        }
-       if (fi->dentry) {
-               err = note_last_dentry(fi, fi->dentry->d_name.name,
-                                      fi->dentry->d_name.len);
-               if (err)
-                       return err;
-               dput(fi->dentry);
-               fi->dentry = NULL;
-       }
 
        /* proceed with a normal readdir */
-
-       if (ctx->pos == 2) {
-               /* note dir version at start of readdir so we can tell
-                * if any dentries get dropped */
-               fi->dir_release_count = atomic_read(&ci->i_release_count);
-               fi->dir_ordered_count = ci->i_ordered_count;
-       }
-
 more:
        /* do we have the correct frag content buffered? */
        if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -342,12 +336,15 @@ more:
                req->r_direct_hash = ceph_frag_value(frag);
                req->r_direct_is_hash = true;
                if (fi->last_name) {
-                       req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+                       req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
                        if (!req->r_path2) {
                                ceph_mdsc_put_request(req);
                                return -ENOMEM;
                        }
                }
+               req->r_dir_release_cnt = fi->dir_release_count;
+               req->r_dir_ordered_cnt = fi->dir_ordered_count;
+               req->r_readdir_cache_idx = fi->readdir_cache_idx;
                req->r_readdir_offset = fi->next_offset;
                req->r_args.readdir.frag = cpu_to_le32(frag);
 
@@ -364,26 +361,38 @@ more:
                     (int)req->r_reply_info.dir_end,
                     (int)req->r_reply_info.dir_complete);
 
-               if (!req->r_did_prepopulate) {
-                       dout("readdir !did_prepopulate");
-                       /* preclude from marking dir complete */
-                       fi->dir_release_count--;
-               }
 
                /* note next offset and last dentry name */
                rinfo = &req->r_reply_info;
                if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
                        frag = le32_to_cpu(rinfo->dir_dir->frag);
-                       if (ceph_frag_is_leftmost(frag))
-                               fi->next_offset = 2;
-                       else
-                               fi->next_offset = 0;
-                       off = fi->next_offset;
+                       off = req->r_readdir_offset;
+                       fi->next_offset = off;
                }
+
                fi->frag = frag;
                fi->offset = fi->next_offset;
                fi->last_readdir = req;
 
+               if (req->r_did_prepopulate) {
+                       fi->readdir_cache_idx = req->r_readdir_cache_idx;
+                       if (fi->readdir_cache_idx < 0) {
+                               /* preclude from marking dir ordered */
+                               fi->dir_ordered_count = 0;
+                       } else if (ceph_frag_is_leftmost(frag) && off == 2) {
+                               /* note dir version at start of readdir so
+                                * we can tell if any dentries get dropped */
+                               fi->dir_release_count = req->r_dir_release_cnt;
+                               fi->dir_ordered_count = req->r_dir_ordered_cnt;
+                       }
+               } else {
+                       dout("readdir !did_prepopulate");
+                       /* disable readdir cache */
+                       fi->readdir_cache_idx = -1;
+                       /* preclude from marking dir complete */
+                       fi->dir_release_count = 0;
+               }
+
                if (req->r_reply_info.dir_end) {
                        kfree(fi->last_name);
                        fi->last_name = NULL;
@@ -394,10 +403,10 @@ more:
                } else {
                        err = note_last_dentry(fi,
                                       rinfo->dir_dname[rinfo->dir_nr-1],
-                                      rinfo->dir_dname_len[rinfo->dir_nr-1]);
+                                      rinfo->dir_dname_len[rinfo->dir_nr-1],
+                                      fi->next_offset + rinfo->dir_nr);
                        if (err)
                                return err;
-                       fi->next_offset += rinfo->dir_nr;
                }
        }
 
@@ -453,16 +462,22 @@ more:
         * were released during the whole readdir, and we should have
         * the complete dir contents in our cache.
         */
-       spin_lock(&ci->i_ceph_lock);
-       if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
-               if (ci->i_ordered_count == fi->dir_ordered_count)
+       if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) {
+               spin_lock(&ci->i_ceph_lock);
+               if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
                        dout(" marking %p complete and ordered\n", inode);
-               else
+                       /* use i_size to track number of entries in
+                        * readdir cache */
+                       BUG_ON(fi->readdir_cache_idx < 0);
+                       i_size_write(inode, fi->readdir_cache_idx *
+                                    sizeof(struct dentry*));
+               } else {
                        dout(" marking %p complete\n", inode);
+               }
                __ceph_dir_set_complete(ci, fi->dir_release_count,
                                        fi->dir_ordered_count);
+               spin_unlock(&ci->i_ceph_lock);
        }
-       spin_unlock(&ci->i_ceph_lock);
 
        dout("readdir %p file %p done.\n", inode, file);
        return 0;
@@ -476,14 +491,12 @@ static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
        }
        kfree(fi->last_name);
        fi->last_name = NULL;
+       fi->dir_release_count = 0;
+       fi->readdir_cache_idx = -1;
        if (ceph_frag_is_leftmost(frag))
                fi->next_offset = 2;  /* compensate for . and .. */
        else
                fi->next_offset = 0;
-       if (fi->dentry) {
-               dput(fi->dentry);
-               fi->dentry = NULL;
-       }
        fi->flags &= ~CEPH_F_ATEND;
 }
 
@@ -497,13 +510,12 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
        mutex_lock(&inode->i_mutex);
        retval = -EINVAL;
        switch (whence) {
-       case SEEK_END:
-               offset += inode->i_size + 2;   /* FIXME */
-               break;
        case SEEK_CUR:
                offset += file->f_pos;
        case SEEK_SET:
                break;
+       case SEEK_END:
+               retval = -EOPNOTSUPP;
        default:
                goto out;
        }
@@ -516,20 +528,18 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
                }
                retval = offset;
 
-               /*
-                * discard buffered readdir content on seekdir(0), or
-                * seek to new frag, or seek prior to current chunk.
-                */
                if (offset == 0 ||
                    fpos_frag(offset) != fi->frag ||
                    fpos_off(offset) < fi->offset) {
+                       /* discard buffered readdir content on seekdir(0), or
+                        * seek to new frag, or seek prior to current chunk */
                        dout("dir_llseek dropping %p content\n", file);
                        reset_readdir(fi, fpos_frag(offset));
+               } else if (fpos_cmp(offset, old_offset) > 0) {
+                       /* reset dir_release_count if we did a forward seek */
+                       fi->dir_release_count = 0;
+                       fi->readdir_cache_idx = -1;
                }
-
-               /* bump dir_release_count if we did a forward seek */
-               if (fpos_cmp(offset, old_offset) > 0)
-                       fi->dir_release_count--;
        }
 out:
        mutex_unlock(&inode->i_mutex);
@@ -764,7 +774,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
                err = PTR_ERR(req);
                goto out;
        }
-       req->r_path2 = kstrdup(dest, GFP_NOFS);
+       req->r_path2 = kstrdup(dest, GFP_KERNEL);
        if (!req->r_path2) {
                err = -ENOMEM;
                ceph_mdsc_put_request(req);
@@ -985,16 +995,15 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * to do it here.
                 */
 
+               /* d_move screws up sibling dentries' offsets */
+               ceph_dir_clear_complete(old_dir);
+               ceph_dir_clear_complete(new_dir);
+
                d_move(old_dentry, new_dentry);
 
                /* ensure target dentry is invalidated, despite
                   rehashing bug in vfs_rename_dir */
                ceph_invalidate_dentry_lease(new_dentry);
-
-               /* d_move screws up sibling dentries' offsets */
-               ceph_dir_clear_complete(old_dir);
-               ceph_dir_clear_complete(new_dir);
-
        }
        ceph_mdsc_put_request(req);
        return err;
@@ -1189,7 +1198,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
                return -EISDIR;
 
        if (!cf->dir_info) {
-               cf->dir_info = kmalloc(bufsize, GFP_NOFS);
+               cf->dir_info = kmalloc(bufsize, GFP_KERNEL);
                if (!cf->dir_info)
                        return -ENOMEM;
                cf->dir_info_len =
@@ -1223,66 +1232,6 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
        return size - left;
 }
 
-/*
- * an fsync() on a dir will wait for any uncommitted directory
- * operations to commit.
- */
-static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
-                         int datasync)
-{
-       struct inode *inode = file_inode(file);
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       struct list_head *head = &ci->i_unsafe_dirops;
-       struct ceph_mds_request *req;
-       u64 last_tid;
-       int ret = 0;
-
-       dout("dir_fsync %p\n", inode);
-       ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       if (ret)
-               return ret;
-       mutex_lock(&inode->i_mutex);
-
-       spin_lock(&ci->i_unsafe_lock);
-       if (list_empty(head))
-               goto out;
-
-       req = list_entry(head->prev,
-                        struct ceph_mds_request, r_unsafe_dir_item);
-       last_tid = req->r_tid;
-
-       do {
-               ceph_mdsc_get_request(req);
-               spin_unlock(&ci->i_unsafe_lock);
-
-               dout("dir_fsync %p wait on tid %llu (until %llu)\n",
-                    inode, req->r_tid, last_tid);
-               if (req->r_timeout) {
-                       unsigned long time_left = wait_for_completion_timeout(
-                                                       &req->r_safe_completion,
-                                                       req->r_timeout);
-                       if (time_left > 0)
-                               ret = 0;
-                       else
-                               ret = -EIO;  /* timed out */
-               } else {
-                       wait_for_completion(&req->r_safe_completion);
-               }
-               ceph_mdsc_put_request(req);
-
-               spin_lock(&ci->i_unsafe_lock);
-               if (ret || list_empty(head))
-                       break;
-               req = list_entry(head->next,
-                                struct ceph_mds_request, r_unsafe_dir_item);
-       } while (req->r_tid < last_tid);
-out:
-       spin_unlock(&ci->i_unsafe_lock);
-       mutex_unlock(&inode->i_mutex);
-
-       return ret;
-}
-
 /*
  * We maintain a private dentry LRU.
  *
@@ -1353,7 +1302,7 @@ const struct file_operations ceph_dir_fops = {
        .open = ceph_open,
        .release = ceph_release,
        .unlocked_ioctl = ceph_ioctl,
-       .fsync = ceph_dir_fsync,
+       .fsync = ceph_fsync,
 };
 
 const struct file_operations ceph_snapdir_fops = {
index 3b6b522b4b31ed9e2f7193c661894787bfe63627..faf92095e105650617d8e465e898f8d22a803d60 100644 (file)
@@ -89,13 +89,14 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
        case S_IFDIR:
                dout("init_file %p %p 0%o (regular)\n", inode, file,
                     inode->i_mode);
-               cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO);
+               cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO);
                if (cf == NULL) {
                        ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
                        return -ENOMEM;
                }
                cf->fmode = fmode;
                cf->next_offset = 2;
+               cf->readdir_cache_idx = -1;
                file->private_data = cf;
                BUG_ON(inode->i_fop->release != ceph_release);
                break;
@@ -324,7 +325,6 @@ int ceph_release(struct inode *inode, struct file *file)
                ceph_mdsc_put_request(cf->last_readdir);
        kfree(cf->last_name);
        kfree(cf->dir_info);
-       dput(cf->dentry);
        kmem_cache_free(ceph_file_cachep, cf);
 
        /* wake up anyone waiting for caps on this inode */
@@ -483,7 +483,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
                }
        } else {
                num_pages = calc_pages_for(off, len);
-               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+               pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                if (IS_ERR(pages))
                        return PTR_ERR(pages);
                ret = striped_read(inode, off, len, pages,
@@ -557,13 +557,13 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
  * objects, rollback on failure, etc.)
  */
 static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
+ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
+                      struct ceph_snap_context *snapc)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_snap_context *snapc;
        struct ceph_vino vino;
        struct ceph_osd_request *req;
        struct page **pages;
@@ -600,7 +600,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
                size_t start;
                ssize_t n;
 
-               snapc = ci->i_snap_realm->cached_context;
                vino = ceph_vino(inode);
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                            vino, pos, &len, 0,
@@ -614,7 +613,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
                        break;
                }
 
-               osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+               osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
 
                n = iov_iter_get_pages_alloc(from, &pages, len, &start);
                if (unlikely(n < 0)) {
@@ -674,13 +673,13 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
  * objects, rollback on failure, etc.)
  */
 static ssize_t
-ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
+ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
+               struct ceph_snap_context *snapc)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_snap_context *snapc;
        struct ceph_vino vino;
        struct ceph_osd_request *req;
        struct page **pages;
@@ -717,7 +716,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
                size_t left;
                int n;
 
-               snapc = ci->i_snap_realm->cached_context;
                vino = ceph_vino(inode);
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                            vino, pos, &len, 0, 1,
@@ -736,7 +734,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
                 */
                num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+               pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
                        goto out;
@@ -860,7 +858,7 @@ again:
                struct page *page = NULL;
                loff_t i_size;
                if (retry_op == READ_INLINE) {
-                       page = __page_cache_alloc(GFP_NOFS);
+                       page = __page_cache_alloc(GFP_KERNEL);
                        if (!page)
                                return -ENOMEM;
                }
@@ -941,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_osd_client *osdc =
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
+       struct ceph_cap_flush *prealloc_cf;
        ssize_t count, written = 0;
        int err, want, got;
        loff_t pos;
@@ -948,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
+       prealloc_cf = ceph_alloc_cap_flush();
+       if (!prealloc_cf)
+               return -ENOMEM;
+
        mutex_lock(&inode->i_mutex);
 
        /* We can write back this queue in page reclaim */
@@ -996,14 +999,30 @@ retry_snap:
 
        if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
            (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+               struct ceph_snap_context *snapc;
                struct iov_iter data;
                mutex_unlock(&inode->i_mutex);
+
+               spin_lock(&ci->i_ceph_lock);
+               if (__ceph_have_pending_cap_snap(ci)) {
+                       struct ceph_cap_snap *capsnap =
+                                       list_last_entry(&ci->i_cap_snaps,
+                                                       struct ceph_cap_snap,
+                                                       ci_item);
+                       snapc = ceph_get_snap_context(capsnap->context);
+               } else {
+                       BUG_ON(!ci->i_head_snapc);
+                       snapc = ceph_get_snap_context(ci->i_head_snapc);
+               }
+               spin_unlock(&ci->i_ceph_lock);
+
                /* we might need to revert back to that point */
                data = *from;
                if (iocb->ki_flags & IOCB_DIRECT)
-                       written = ceph_sync_direct_write(iocb, &data, pos);
+                       written = ceph_sync_direct_write(iocb, &data, pos,
+                                                        snapc);
                else
-                       written = ceph_sync_write(iocb, &data, pos);
+                       written = ceph_sync_write(iocb, &data, pos, snapc);
                if (written == -EOLDSNAPC) {
                        dout("aio_write %p %llx.%llx %llu~%u"
                                "got EOLDSNAPC, retrying\n",
@@ -1014,6 +1033,7 @@ retry_snap:
                }
                if (written > 0)
                        iov_iter_advance(from, written);
+               ceph_put_snap_context(snapc);
        } else {
                loff_t old_size = inode->i_size;
                /*
@@ -1035,7 +1055,8 @@ retry_snap:
                int dirty;
                spin_lock(&ci->i_ceph_lock);
                ci->i_inline_version = CEPH_INLINE_NONE;
-               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+                                              &prealloc_cf);
                spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
@@ -1059,6 +1080,7 @@ retry_snap:
 out:
        mutex_unlock(&inode->i_mutex);
 out_unlocked:
+       ceph_free_cap_flush(prealloc_cf);
        current->backing_dev_info = NULL;
        return written ? written : err;
 }
@@ -1255,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode,
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_osd_client *osdc =
                &ceph_inode_to_client(inode)->client->osdc;
+       struct ceph_cap_flush *prealloc_cf;
        int want, got = 0;
        int dirty;
        int ret = 0;
@@ -1267,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
+       prealloc_cf = ceph_alloc_cap_flush();
+       if (!prealloc_cf)
+               return -ENOMEM;
+
        mutex_lock(&inode->i_mutex);
 
        if (ceph_snap(inode) != CEPH_NOSNAP) {
@@ -1313,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!ret) {
                spin_lock(&ci->i_ceph_lock);
                ci->i_inline_version = CEPH_INLINE_NONE;
-               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+                                              &prealloc_cf);
                spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
@@ -1322,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode,
        ceph_put_cap_refs(ci, got);
 unlock:
        mutex_unlock(&inode->i_mutex);
+       ceph_free_cap_flush(prealloc_cf);
        return ret;
 }
 
index 571acd88606cfcec3d01fc4a6ef453f0b49e9713..96d2bd8299022e554c8bfdc0b7f1c759be8fc8cd 100644 (file)
@@ -389,9 +389,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_inline_version = 0;
        ci->i_time_warp_seq = 0;
        ci->i_ceph_flags = 0;
-       ci->i_ordered_count = 0;
-       atomic_set(&ci->i_release_count, 1);
-       atomic_set(&ci->i_complete_count, 0);
+       atomic64_set(&ci->i_ordered_count, 1);
+       atomic64_set(&ci->i_release_count, 1);
+       atomic64_set(&ci->i_complete_seq[0], 0);
+       atomic64_set(&ci->i_complete_seq[1], 0);
        ci->i_symlink = NULL;
 
        memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
@@ -415,9 +416,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_flushing_caps = 0;
        INIT_LIST_HEAD(&ci->i_dirty_item);
        INIT_LIST_HEAD(&ci->i_flushing_item);
-       ci->i_cap_flush_seq = 0;
-       ci->i_cap_flush_last_tid = 0;
-       memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid));
+       ci->i_prealloc_cap_flush = NULL;
+       ci->i_cap_flush_tree = RB_ROOT;
        init_waitqueue_head(&ci->i_cap_wq);
        ci->i_hold_caps_min = 0;
        ci->i_hold_caps_max = 0;
@@ -752,7 +752,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 
        if (new_version ||
            (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+               if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
+                       ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
                ci->i_layout = info->layout;
+
                queue_trunc = ceph_fill_file_size(inode, issued,
                                        le32_to_cpu(info->truncate_seq),
                                        le64_to_cpu(info->truncate_size),
@@ -858,9 +861,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                            (issued & CEPH_CAP_FILE_EXCL) == 0 &&
                            !__ceph_dir_is_complete(ci)) {
                                dout(" marking %p complete (empty)\n", inode);
+                               i_size_write(inode, 0);
                                __ceph_dir_set_complete(ci,
-                                       atomic_read(&ci->i_release_count),
-                                       ci->i_ordered_count);
+                                       atomic64_read(&ci->i_release_count),
+                                       atomic64_read(&ci->i_ordered_count));
                        }
 
                        wake = true;
@@ -1212,6 +1216,10 @@ retry_lookup:
                        dout("fill_trace doing d_move %p -> %p\n",
                             req->r_old_dentry, dn);
 
+                       /* d_move screws up sibling dentries' offsets */
+                       ceph_dir_clear_ordered(dir);
+                       ceph_dir_clear_ordered(olddir);
+
                        d_move(req->r_old_dentry, dn);
                        dout(" src %p '%pd' dst %p '%pd'\n",
                             req->r_old_dentry,
@@ -1222,10 +1230,6 @@ retry_lookup:
                           rehashing bug in vfs_rename_dir */
                        ceph_invalidate_dentry_lease(dn);
 
-                       /* d_move screws up sibling dentries' offsets */
-                       ceph_dir_clear_ordered(dir);
-                       ceph_dir_clear_ordered(olddir);
-
                        dout("dn %p gets new offset %lld\n", req->r_old_dentry,
                             ceph_dentry(req->r_old_dentry)->offset);
 
@@ -1333,6 +1337,49 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
        return err;
 }
 
+void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl)
+{
+       if (ctl->page) {
+               kunmap(ctl->page);
+               page_cache_release(ctl->page);
+               ctl->page = NULL;
+       }
+}
+
+static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
+                             struct ceph_readdir_cache_control *ctl,
+                             struct ceph_mds_request *req)
+{
+       struct ceph_inode_info *ci = ceph_inode(dir);
+       unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*);
+       unsigned idx = ctl->index % nsize;
+       pgoff_t pgoff = ctl->index / nsize;
+
+       if (!ctl->page || pgoff != page_index(ctl->page)) {
+               ceph_readdir_cache_release(ctl);
+               ctl->page  = grab_cache_page(&dir->i_data, pgoff);
+               if (!ctl->page) {
+                       ctl->index = -1;
+                       return -ENOMEM;
+               }
+               /* reading/filling the cache are serialized by
+                * i_mutex, no need to use page lock */
+               unlock_page(ctl->page);
+               ctl->dentries = kmap(ctl->page);
+       }
+
+       if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
+           req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) {
+               dout("readdir cache dn %p idx %d\n", dn, ctl->index);
+               ctl->dentries[idx] = dn;
+               ctl->index++;
+       } else {
+               dout("disable readdir cache\n");
+               ctl->index = -1;
+       }
+       return 0;
+}
+
 int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                             struct ceph_mds_session *session)
 {
@@ -1345,8 +1392,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        struct inode *snapdir = NULL;
        struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
        struct ceph_dentry_info *di;
-       u64 r_readdir_offset = req->r_readdir_offset;
        u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+       struct ceph_readdir_cache_control cache_ctl = {};
+
+       if (req->r_aborted)
+               return readdir_prepopulate_inodes_only(req, session);
 
        if (rinfo->dir_dir &&
            le32_to_cpu(rinfo->dir_dir->frag) != frag) {
@@ -1354,14 +1404,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                     frag, le32_to_cpu(rinfo->dir_dir->frag));
                frag = le32_to_cpu(rinfo->dir_dir->frag);
                if (ceph_frag_is_leftmost(frag))
-                       r_readdir_offset = 2;
+                       req->r_readdir_offset = 2;
                else
-                       r_readdir_offset = 0;
+                       req->r_readdir_offset = 0;
        }
 
-       if (req->r_aborted)
-               return readdir_prepopulate_inodes_only(req, session);
-
        if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
                snapdir = ceph_get_snapdir(d_inode(parent));
                parent = d_find_alias(snapdir);
@@ -1374,6 +1421,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                        ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
        }
 
+       if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) {
+               /* note dir version at start of readdir so we can tell
+                * if any dentries get dropped */
+               struct ceph_inode_info *ci = ceph_inode(d_inode(parent));
+               req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
+               req->r_dir_ordered_cnt = atomic64_read(&ci->i_ordered_count);
+               req->r_readdir_cache_idx = 0;
+       }
+
+       cache_ctl.index = req->r_readdir_cache_idx;
+
        /* FIXME: release caps/leases if error occurs */
        for (i = 0; i < rinfo->dir_nr; i++) {
                struct ceph_vino vino;
@@ -1413,13 +1471,6 @@ retry_lookup:
                        d_delete(dn);
                        dput(dn);
                        goto retry_lookup;
-               } else {
-                       /* reorder parent's d_subdirs */
-                       spin_lock(&parent->d_lock);
-                       spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-                       list_move(&dn->d_child, &parent->d_subdirs);
-                       spin_unlock(&dn->d_lock);
-                       spin_unlock(&parent->d_lock);
                }
 
                /* inode */
@@ -1436,13 +1487,15 @@ retry_lookup:
                        }
                }
 
-               if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
-                              req->r_request_started, -1,
-                              &req->r_caps_reservation) < 0) {
+               ret = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
+                                req->r_request_started, -1,
+                                &req->r_caps_reservation);
+               if (ret < 0) {
                        pr_err("fill_inode badness on %p\n", in);
                        if (d_really_is_negative(dn))
                                iput(in);
                        d_drop(dn);
+                       err = ret;
                        goto next_item;
                }
 
@@ -1458,19 +1511,28 @@ retry_lookup:
                }
 
                di = dn->d_fsdata;
-               di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+               di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
 
                update_dentry_lease(dn, rinfo->dir_dlease[i],
                                    req->r_session,
                                    req->r_request_started);
+
+               if (err == 0 && cache_ctl.index >= 0) {
+                       ret = fill_readdir_cache(d_inode(parent), dn,
+                                                &cache_ctl, req);
+                       if (ret < 0)
+                               err = ret;
+               }
 next_item:
                if (dn)
                        dput(dn);
        }
-       if (err == 0)
-               req->r_did_prepopulate = true;
-
 out:
+       if (err == 0) {
+               req->r_did_prepopulate = true;
+               req->r_readdir_cache_idx = cache_ctl.index;
+       }
+       ceph_readdir_cache_release(&cache_ctl);
        if (snapdir) {
                iput(snapdir);
                dput(parent);
@@ -1712,11 +1774,13 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        const unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
+       struct ceph_cap_flush *prealloc_cf;
        int issued;
        int release = 0, dirtied = 0;
        int mask = 0;
        int err = 0;
        int inode_dirty_flags = 0;
+       bool lock_snap_rwsem = false;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
@@ -1725,13 +1789,31 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (err != 0)
                return err;
 
+       prealloc_cf = ceph_alloc_cap_flush();
+       if (!prealloc_cf)
+               return -ENOMEM;
+
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
                                       USE_AUTH_MDS);
-       if (IS_ERR(req))
+       if (IS_ERR(req)) {
+               ceph_free_cap_flush(prealloc_cf);
                return PTR_ERR(req);
+       }
 
        spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
+
+       if (!ci->i_head_snapc &&
+           (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) {
+               lock_snap_rwsem = true;
+               if (!down_read_trylock(&mdsc->snap_rwsem)) {
+                       spin_unlock(&ci->i_ceph_lock);
+                       down_read(&mdsc->snap_rwsem);
+                       spin_lock(&ci->i_ceph_lock);
+                       issued = __ceph_caps_issued(ci, NULL);
+               }
+       }
+
        dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
 
        if (ia_valid & ATTR_UID) {
@@ -1874,12 +1956,15 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                dout("setattr %p ATTR_FILE ... hrm!\n", inode);
 
        if (dirtied) {
-               inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
+               inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
+                                                          &prealloc_cf);
                inode->i_ctime = CURRENT_TIME;
        }
 
        release &= issued;
        spin_unlock(&ci->i_ceph_lock);
+       if (lock_snap_rwsem)
+               up_read(&mdsc->snap_rwsem);
 
        if (inode_dirty_flags)
                __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1904,9 +1989,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        ceph_mdsc_put_request(req);
        if (mask & CEPH_SETATTR_SIZE)
                __ceph_do_pending_vmtruncate(inode);
+       ceph_free_cap_flush(prealloc_cf);
        return err;
 out_put:
        ceph_mdsc_put_request(req);
+       ceph_free_cap_flush(prealloc_cf);
        return err;
 }
 
index 84f37f34f9aa663952a60e8a3440a81934c083b0..6aa07af67603ada211f49268d3845ea62b625720 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/utsname.h>
+#include <linux/ratelimit.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -458,7 +459,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
        s->s_cap_reconnect = 0;
        s->s_cap_iterator = NULL;
        INIT_LIST_HEAD(&s->s_cap_releases);
-       INIT_LIST_HEAD(&s->s_cap_releases_done);
        INIT_LIST_HEAD(&s->s_cap_flushing);
        INIT_LIST_HEAD(&s->s_cap_snaps_flushing);
 
@@ -629,6 +629,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
        req->r_uid = current_fsuid();
        req->r_gid = current_fsgid();
 
+       if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
+               mdsc->oldest_tid = req->r_tid;
+
        if (dir) {
                struct ceph_inode_info *ci = ceph_inode(dir);
 
@@ -644,6 +647,21 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
                                 struct ceph_mds_request *req)
 {
        dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+
+       if (req->r_tid == mdsc->oldest_tid) {
+               struct rb_node *p = rb_next(&req->r_node);
+               mdsc->oldest_tid = 0;
+               while (p) {
+                       struct ceph_mds_request *next_req =
+                               rb_entry(p, struct ceph_mds_request, r_node);
+                       if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) {
+                               mdsc->oldest_tid = next_req->r_tid;
+                               break;
+                       }
+                       p = rb_next(p);
+               }
+       }
+
        rb_erase(&req->r_node, &mdsc->request_tree);
        RB_CLEAR_NODE(&req->r_node);
 
@@ -998,27 +1016,25 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
  * session caps
  */
 
-/*
- * Free preallocated cap messages assigned to this session
- */
-static void cleanup_cap_releases(struct ceph_mds_session *session)
+/* caller holds s_cap_lock, we drop it */
+static void cleanup_cap_releases(struct ceph_mds_client *mdsc,
+                                struct ceph_mds_session *session)
+       __releases(session->s_cap_lock)
 {
-       struct ceph_msg *msg;
+       LIST_HEAD(tmp_list);
+       list_splice_init(&session->s_cap_releases, &tmp_list);
+       session->s_num_cap_releases = 0;
+       spin_unlock(&session->s_cap_lock);
 
-       spin_lock(&session->s_cap_lock);
-       while (!list_empty(&session->s_cap_releases)) {
-               msg = list_first_entry(&session->s_cap_releases,
-                                      struct ceph_msg, list_head);
-               list_del_init(&msg->list_head);
-               ceph_msg_put(msg);
-       }
-       while (!list_empty(&session->s_cap_releases_done)) {
-               msg = list_first_entry(&session->s_cap_releases_done,
-                                      struct ceph_msg, list_head);
-               list_del_init(&msg->list_head);
-               ceph_msg_put(msg);
+       dout("cleanup_cap_releases mds%d\n", session->s_mds);
+       while (!list_empty(&tmp_list)) {
+               struct ceph_cap *cap;
+               /* zero out the in-progress message */
+               cap = list_first_entry(&tmp_list,
+                                       struct ceph_cap, session_caps);
+               list_del(&cap->session_caps);
+               ceph_put_cap(mdsc, cap);
        }
-       spin_unlock(&session->s_cap_lock);
 }
 
 static void cleanup_session_requests(struct ceph_mds_client *mdsc,
@@ -1033,7 +1049,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
                req = list_first_entry(&session->s_unsafe,
                                       struct ceph_mds_request, r_unsafe_item);
                list_del_init(&req->r_unsafe_item);
-               pr_info(" dropping unsafe request %llu\n", req->r_tid);
+               pr_warn_ratelimited(" dropping unsafe request %llu\n",
+                                   req->r_tid);
                __unregister_request(mdsc, req);
        }
        /* zero r_attempts, so kick_requests() will re-send requests */
@@ -1095,10 +1112,16 @@ static int iterate_session_caps(struct ceph_mds_session *session,
                        dout("iterate_session_caps  finishing cap %p removal\n",
                             cap);
                        BUG_ON(cap->session != session);
+                       cap->session = NULL;
                        list_del_init(&cap->session_caps);
                        session->s_nr_caps--;
-                       cap->session = NULL;
-                       old_cap = cap;  /* put_cap it w/o locks held */
+                       if (cap->queue_release) {
+                               list_add_tail(&cap->session_caps,
+                                             &session->s_cap_releases);
+                               session->s_num_cap_releases++;
+                       } else {
+                               old_cap = cap;  /* put_cap it w/o locks held */
+                       }
                }
                if (ret < 0)
                        goto out;
@@ -1119,6 +1142,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                                  void *arg)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       LIST_HEAD(to_remove);
        int drop = 0;
 
        dout("removing cap %p, ci is %p, inode is %p\n",
@@ -1126,12 +1150,27 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        spin_lock(&ci->i_ceph_lock);
        __ceph_remove_cap(cap, false);
        if (!ci->i_auth_cap) {
+               struct ceph_cap_flush *cf;
                struct ceph_mds_client *mdsc =
                        ceph_sb_to_client(inode->i_sb)->mdsc;
 
+               while (true) {
+                       struct rb_node *n = rb_first(&ci->i_cap_flush_tree);
+                       if (!n)
+                               break;
+                       cf = rb_entry(n, struct ceph_cap_flush, i_node);
+                       rb_erase(&cf->i_node, &ci->i_cap_flush_tree);
+                       list_add(&cf->list, &to_remove);
+               }
+
                spin_lock(&mdsc->cap_dirty_lock);
+
+               list_for_each_entry(cf, &to_remove, list)
+                       rb_erase(&cf->g_node, &mdsc->cap_flush_tree);
+
                if (!list_empty(&ci->i_dirty_item)) {
-                       pr_info(" dropping dirty %s state for %p %lld\n",
+                       pr_warn_ratelimited(
+                               " dropping dirty %s state for %p %lld\n",
                                ceph_cap_string(ci->i_dirty_caps),
                                inode, ceph_ino(inode));
                        ci->i_dirty_caps = 0;
@@ -1139,7 +1178,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        drop = 1;
                }
                if (!list_empty(&ci->i_flushing_item)) {
-                       pr_info(" dropping dirty+flushing %s state for %p %lld\n",
+                       pr_warn_ratelimited(
+                               " dropping dirty+flushing %s state for %p %lld\n",
                                ceph_cap_string(ci->i_flushing_caps),
                                inode, ceph_ino(inode));
                        ci->i_flushing_caps = 0;
@@ -1148,8 +1188,20 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        drop = 1;
                }
                spin_unlock(&mdsc->cap_dirty_lock);
+
+               if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
+                       list_add(&ci->i_prealloc_cap_flush->list, &to_remove);
+                       ci->i_prealloc_cap_flush = NULL;
+               }
        }
        spin_unlock(&ci->i_ceph_lock);
+       while (!list_empty(&to_remove)) {
+               struct ceph_cap_flush *cf;
+               cf = list_first_entry(&to_remove,
+                                     struct ceph_cap_flush, list);
+               list_del(&cf->list);
+               ceph_free_cap_flush(cf);
+       }
        while (drop--)
                iput(inode);
        return 0;
@@ -1191,11 +1243,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
                        spin_lock(&session->s_cap_lock);
                }
        }
-       spin_unlock(&session->s_cap_lock);
+
+       // drop cap expires and unlock s_cap_lock
+       cleanup_cap_releases(session->s_mdsc, session);
 
        BUG_ON(session->s_nr_caps > 0);
        BUG_ON(!list_empty(&session->s_cap_flushing));
-       cleanup_cap_releases(session);
 }
 
 /*
@@ -1371,7 +1424,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
             inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
             ceph_cap_string(used), ceph_cap_string(wanted));
        if (cap == ci->i_auth_cap) {
-               if (ci->i_dirty_caps | ci->i_flushing_caps)
+               if (ci->i_dirty_caps || ci->i_flushing_caps ||
+                   !list_empty(&ci->i_cap_snaps))
                        goto out;
                if ((used | wanted) & CEPH_CAP_ANY_WR)
                        goto out;
@@ -1417,121 +1471,80 @@ static int trim_caps(struct ceph_mds_client *mdsc,
                session->s_trim_caps = 0;
        }
 
-       ceph_add_cap_releases(mdsc, session);
        ceph_send_cap_releases(mdsc, session);
        return 0;
 }
 
-/*
- * Allocate cap_release messages.  If there is a partially full message
- * in the queue, try to allocate enough to cover it's remainder, so that
- * we can send it immediately.
- *
- * Called under s_mutex.
- */
-int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
-                         struct ceph_mds_session *session)
+static int check_capsnap_flush(struct ceph_inode_info *ci,
+                              u64 want_snap_seq)
 {
-       struct ceph_msg *msg, *partial = NULL;
-       struct ceph_mds_cap_release *head;
-       int err = -ENOMEM;
-       int extra = mdsc->fsc->mount_options->cap_release_safety;
-       int num;
-
-       dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
-            extra);
-
-       spin_lock(&session->s_cap_lock);
-
-       if (!list_empty(&session->s_cap_releases)) {
-               msg = list_first_entry(&session->s_cap_releases,
-                                      struct ceph_msg,
-                                list_head);
-               head = msg->front.iov_base;
-               num = le32_to_cpu(head->num);
-               if (num) {
-                       dout(" partial %p with (%d/%d)\n", msg, num,
-                            (int)CEPH_CAPS_PER_RELEASE);
-                       extra += CEPH_CAPS_PER_RELEASE - num;
-                       partial = msg;
-               }
-       }
-       while (session->s_num_cap_releases < session->s_nr_caps + extra) {
-               spin_unlock(&session->s_cap_lock);
-               msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
-                                  GFP_NOFS, false);
-               if (!msg)
-                       goto out_unlocked;
-               dout("add_cap_releases %p msg %p now %d\n", session, msg,
-                    (int)msg->front.iov_len);
-               head = msg->front.iov_base;
-               head->num = cpu_to_le32(0);
-               msg->front.iov_len = sizeof(*head);
-               spin_lock(&session->s_cap_lock);
-               list_add(&msg->list_head, &session->s_cap_releases);
-               session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE;
-       }
-
-       if (partial) {
-               head = partial->front.iov_base;
-               num = le32_to_cpu(head->num);
-               dout(" queueing partial %p with %d/%d\n", partial, num,
-                    (int)CEPH_CAPS_PER_RELEASE);
-               list_move_tail(&partial->list_head,
-                              &session->s_cap_releases_done);
-               session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num;
+       int ret = 1;
+       spin_lock(&ci->i_ceph_lock);
+       if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
+               struct ceph_cap_snap *capsnap =
+                       list_first_entry(&ci->i_cap_snaps,
+                                        struct ceph_cap_snap, ci_item);
+               ret = capsnap->follows >= want_snap_seq;
        }
-       err = 0;
-       spin_unlock(&session->s_cap_lock);
-out_unlocked:
-       return err;
+       spin_unlock(&ci->i_ceph_lock);
+       return ret;
 }
 
-static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
+static int check_caps_flush(struct ceph_mds_client *mdsc,
+                           u64 want_flush_tid)
 {
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       int ret;
-       spin_lock(&ci->i_ceph_lock);
-       if (ci->i_flushing_caps)
-               ret = ci->i_cap_flush_seq >= want_flush_seq;
-       else
-               ret = 1;
-       spin_unlock(&ci->i_ceph_lock);
+       struct rb_node *n;
+       struct ceph_cap_flush *cf;
+       int ret = 1;
+
+       spin_lock(&mdsc->cap_dirty_lock);
+       n = rb_first(&mdsc->cap_flush_tree);
+       cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL;
+       if (cf && cf->tid <= want_flush_tid) {
+               dout("check_caps_flush still flushing tid %llu <= %llu\n",
+                    cf->tid, want_flush_tid);
+               ret = 0;
+       }
+       spin_unlock(&mdsc->cap_dirty_lock);
        return ret;
 }
 
 /*
  * flush all dirty inode data to disk.
  *
- * returns true if we've flushed through want_flush_seq
+ * returns true if we've flushed through want_flush_tid
  */
-static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
+static void wait_caps_flush(struct ceph_mds_client *mdsc,
+                           u64 want_flush_tid, u64 want_snap_seq)
 {
        int mds;
 
-       dout("check_cap_flush want %lld\n", want_flush_seq);
+       dout("check_caps_flush want %llu snap want %llu\n",
+            want_flush_tid, want_snap_seq);
        mutex_lock(&mdsc->mutex);
-       for (mds = 0; mds < mdsc->max_sessions; mds++) {
+       for (mds = 0; mds < mdsc->max_sessions; ) {
                struct ceph_mds_session *session = mdsc->sessions[mds];
                struct inode *inode = NULL;
 
-               if (!session)
+               if (!session) {
+                       mds++;
                        continue;
+               }
                get_session(session);
                mutex_unlock(&mdsc->mutex);
 
                mutex_lock(&session->s_mutex);
-               if (!list_empty(&session->s_cap_flushing)) {
-                       struct ceph_inode_info *ci =
-                               list_entry(session->s_cap_flushing.next,
-                                          struct ceph_inode_info,
-                                          i_flushing_item);
-
-                       if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) {
-                               dout("check_cap_flush still flushing %p "
-                                    "seq %lld <= %lld to mds%d\n",
-                                    &ci->vfs_inode, ci->i_cap_flush_seq,
-                                    want_flush_seq, session->s_mds);
+               if (!list_empty(&session->s_cap_snaps_flushing)) {
+                       struct ceph_cap_snap *capsnap =
+                               list_first_entry(&session->s_cap_snaps_flushing,
+                                                struct ceph_cap_snap,
+                                                flushing_item);
+                       struct ceph_inode_info *ci = capsnap->ci;
+                       if (!check_capsnap_flush(ci, want_snap_seq)) {
+                               dout("check_cap_flush still flushing snap %p "
+                                    "follows %lld <= %lld to mds%d\n",
+                                    &ci->vfs_inode, capsnap->follows,
+                                    want_snap_seq, mds);
                                inode = igrab(&ci->vfs_inode);
                        }
                }
@@ -1540,15 +1553,21 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
 
                if (inode) {
                        wait_event(mdsc->cap_flushing_wq,
-                                  check_cap_flush(inode, want_flush_seq));
+                                  check_capsnap_flush(ceph_inode(inode),
+                                                      want_snap_seq));
                        iput(inode);
+               } else {
+                       mds++;
                }
 
                mutex_lock(&mdsc->mutex);
        }
-
        mutex_unlock(&mdsc->mutex);
-       dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq);
+
+       wait_event(mdsc->cap_flushing_wq,
+                  check_caps_flush(mdsc, want_flush_tid));
+
+       dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
 }
 
 /*
@@ -1557,60 +1576,74 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
 void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
                            struct ceph_mds_session *session)
 {
-       struct ceph_msg *msg;
+       struct ceph_msg *msg = NULL;
+       struct ceph_mds_cap_release *head;
+       struct ceph_mds_cap_item *item;
+       struct ceph_cap *cap;
+       LIST_HEAD(tmp_list);
+       int num_cap_releases;
 
-       dout("send_cap_releases mds%d\n", session->s_mds);
        spin_lock(&session->s_cap_lock);
-       while (!list_empty(&session->s_cap_releases_done)) {
-               msg = list_first_entry(&session->s_cap_releases_done,
-                                struct ceph_msg, list_head);
-               list_del_init(&msg->list_head);
-               spin_unlock(&session->s_cap_lock);
-               msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-               dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
-               ceph_con_send(&session->s_con, msg);
-               spin_lock(&session->s_cap_lock);
-       }
+again:
+       list_splice_init(&session->s_cap_releases, &tmp_list);
+       num_cap_releases = session->s_num_cap_releases;
+       session->s_num_cap_releases = 0;
        spin_unlock(&session->s_cap_lock);
-}
 
-static void discard_cap_releases(struct ceph_mds_client *mdsc,
-                                struct ceph_mds_session *session)
-{
-       struct ceph_msg *msg;
-       struct ceph_mds_cap_release *head;
-       unsigned num;
-
-       dout("discard_cap_releases mds%d\n", session->s_mds);
+       while (!list_empty(&tmp_list)) {
+               if (!msg) {
+                       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE,
+                                       PAGE_CACHE_SIZE, GFP_NOFS, false);
+                       if (!msg)
+                               goto out_err;
+                       head = msg->front.iov_base;
+                       head->num = cpu_to_le32(0);
+                       msg->front.iov_len = sizeof(*head);
+               }
+               cap = list_first_entry(&tmp_list, struct ceph_cap,
+                                       session_caps);
+               list_del(&cap->session_caps);
+               num_cap_releases--;
 
-       if (!list_empty(&session->s_cap_releases)) {
-               /* zero out the in-progress message */
-               msg = list_first_entry(&session->s_cap_releases,
-                                       struct ceph_msg, list_head);
                head = msg->front.iov_base;
-               num = le32_to_cpu(head->num);
-               dout("discard_cap_releases mds%d %p %u\n",
-                    session->s_mds, msg, num);
-               head->num = cpu_to_le32(0);
-               msg->front.iov_len = sizeof(*head);
-               session->s_num_cap_releases += num;
+               le32_add_cpu(&head->num, 1);
+               item = msg->front.iov_base + msg->front.iov_len;
+               item->ino = cpu_to_le64(cap->cap_ino);
+               item->cap_id = cpu_to_le64(cap->cap_id);
+               item->migrate_seq = cpu_to_le32(cap->mseq);
+               item->seq = cpu_to_le32(cap->issue_seq);
+               msg->front.iov_len += sizeof(*item);
+
+               ceph_put_cap(mdsc, cap);
+
+               if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+                       msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+                       dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
+                       ceph_con_send(&session->s_con, msg);
+                       msg = NULL;
+               }
        }
 
-       /* requeue completed messages */
-       while (!list_empty(&session->s_cap_releases_done)) {
-               msg = list_first_entry(&session->s_cap_releases_done,
-                                struct ceph_msg, list_head);
-               list_del_init(&msg->list_head);
+       BUG_ON(num_cap_releases != 0);
 
-               head = msg->front.iov_base;
-               num = le32_to_cpu(head->num);
-               dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg,
-                    num);
-               session->s_num_cap_releases += num;
-               head->num = cpu_to_le32(0);
-               msg->front.iov_len = sizeof(*head);
-               list_add(&msg->list_head, &session->s_cap_releases);
+       spin_lock(&session->s_cap_lock);
+       if (!list_empty(&session->s_cap_releases))
+               goto again;
+       spin_unlock(&session->s_cap_lock);
+
+       if (msg) {
+               msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+               dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
+               ceph_con_send(&session->s_con, msg);
        }
+       return;
+out_err:
+       pr_err("send_cap_releases mds%d, failed to allocate message\n",
+               session->s_mds);
+       spin_lock(&session->s_cap_lock);
+       list_splice(&tmp_list, &session->s_cap_releases);
+       session->s_num_cap_releases += num_cap_releases;
+       spin_unlock(&session->s_cap_lock);
 }
 
 /*
@@ -1635,7 +1668,8 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
 
        order = get_order(size * num_entries);
        while (order >= 0) {
-               rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
+               rinfo->dir_in = (void*)__get_free_pages(GFP_KERNEL |
+                                                       __GFP_NOWARN,
                                                        order);
                if (rinfo->dir_in)
                        break;
@@ -1697,13 +1731,9 @@ static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
                        struct ceph_mds_request, r_node);
 }
 
-static u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
+static inline  u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
 {
-       struct ceph_mds_request *req = __get_oldest_req(mdsc);
-
-       if (req)
-               return req->r_tid;
-       return 0;
+       return mdsc->oldest_tid;
 }
 
 /*
@@ -2267,15 +2297,18 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
        /* wait */
        mutex_unlock(&mdsc->mutex);
        dout("do_request waiting\n");
-       if (req->r_timeout) {
-               err = (long)wait_for_completion_killable_timeout(
-                       &req->r_completion, req->r_timeout);
-               if (err == 0)
-                       err = -EIO;
-       } else if (req->r_wait_for_completion) {
+       if (!req->r_timeout && req->r_wait_for_completion) {
                err = req->r_wait_for_completion(mdsc, req);
        } else {
-               err = wait_for_completion_killable(&req->r_completion);
+               long timeleft = wait_for_completion_killable_timeout(
+                                       &req->r_completion,
+                                       ceph_timeout_jiffies(req->r_timeout));
+               if (timeleft > 0)
+                       err = 0;
+               else if (!timeleft)
+                       err = -EIO;  /* timed out */
+               else
+                       err = timeleft;  /* killed */
        }
        dout("do_request waited, got %d\n", err);
        mutex_lock(&mdsc->mutex);
@@ -2496,7 +2529,6 @@ out_err:
        }
        mutex_unlock(&mdsc->mutex);
 
-       ceph_add_cap_releases(mdsc, req->r_session);
        mutex_unlock(&session->s_mutex);
 
        /* kick calling process */
@@ -2888,8 +2920,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
         */
        session->s_cap_reconnect = 1;
        /* drop old cap expires; we're about to reestablish that state */
-       discard_cap_releases(mdsc, session);
-       spin_unlock(&session->s_cap_lock);
+       cleanup_cap_releases(mdsc, session);
 
        /* trim unused caps to reduce MDS's cache rejoin time */
        if (mdsc->fsc->sb->s_root)
@@ -2956,6 +2987,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 
        reply->hdr.data_len = cpu_to_le32(pagelist->length);
        ceph_msg_data_add_pagelist(reply, pagelist);
+
+       ceph_early_kick_flushing_caps(mdsc, session);
+
        ceph_con_send(&session->s_con, reply);
 
        mutex_unlock(&session->s_mutex);
@@ -3352,7 +3386,6 @@ static void delayed_work(struct work_struct *work)
                        send_renew_caps(mdsc, s);
                else
                        ceph_con_keepalive(&s->s_con);
-               ceph_add_cap_releases(mdsc, s);
                if (s->s_state == CEPH_MDS_SESSION_OPEN ||
                    s->s_state == CEPH_MDS_SESSION_HUNG)
                        ceph_send_cap_releases(mdsc, s);
@@ -3390,11 +3423,13 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        atomic_set(&mdsc->num_sessions, 0);
        mdsc->max_sessions = 0;
        mdsc->stopping = 0;
+       mdsc->last_snap_seq = 0;
        init_rwsem(&mdsc->snap_rwsem);
        mdsc->snap_realms = RB_ROOT;
        INIT_LIST_HEAD(&mdsc->snap_empty);
        spin_lock_init(&mdsc->snap_empty_lock);
        mdsc->last_tid = 0;
+       mdsc->oldest_tid = 0;
        mdsc->request_tree = RB_ROOT;
        INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
        mdsc->last_renew_caps = jiffies;
@@ -3402,7 +3437,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        spin_lock_init(&mdsc->cap_delay_lock);
        INIT_LIST_HEAD(&mdsc->snap_flush_list);
        spin_lock_init(&mdsc->snap_flush_lock);
-       mdsc->cap_flush_seq = 0;
+       mdsc->last_cap_flush_tid = 1;
+       mdsc->cap_flush_tree = RB_ROOT;
        INIT_LIST_HEAD(&mdsc->cap_dirty);
        INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
        mdsc->num_cap_flushing = 0;
@@ -3414,6 +3450,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        ceph_caps_init(mdsc);
        ceph_adjust_min_caps(mdsc, fsc->min_caps);
 
+       init_rwsem(&mdsc->pool_perm_rwsem);
+       mdsc->pool_perm_tree = RB_ROOT;
+
        return 0;
 }
 
@@ -3423,8 +3462,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
  */
 static void wait_requests(struct ceph_mds_client *mdsc)
 {
+       struct ceph_options *opts = mdsc->fsc->client->options;
        struct ceph_mds_request *req;
-       struct ceph_fs_client *fsc = mdsc->fsc;
 
        mutex_lock(&mdsc->mutex);
        if (__get_oldest_req(mdsc)) {
@@ -3432,7 +3471,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 
                dout("wait_requests waiting for requests\n");
                wait_for_completion_timeout(&mdsc->safe_umount_waiters,
-                                   fsc->client->options->mount_timeout * HZ);
+                                   ceph_timeout_jiffies(opts->mount_timeout));
 
                /* tear down remaining requests */
                mutex_lock(&mdsc->mutex);
@@ -3485,7 +3524,8 @@ restart:
                        nextreq = rb_entry(n, struct ceph_mds_request, r_node);
                else
                        nextreq = NULL;
-               if ((req->r_op & CEPH_MDS_OP_WRITE)) {
+               if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
+                   (req->r_op & CEPH_MDS_OP_WRITE)) {
                        /* write op */
                        ceph_mdsc_get_request(req);
                        if (nextreq)
@@ -3513,7 +3553,7 @@ restart:
 
 void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
-       u64 want_tid, want_flush;
+       u64 want_tid, want_flush, want_snap;
 
        if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
                return;
@@ -3525,13 +3565,18 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 
        ceph_flush_dirty_caps(mdsc);
        spin_lock(&mdsc->cap_dirty_lock);
-       want_flush = mdsc->cap_flush_seq;
+       want_flush = mdsc->last_cap_flush_tid;
        spin_unlock(&mdsc->cap_dirty_lock);
 
-       dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
+       down_read(&mdsc->snap_rwsem);
+       want_snap = mdsc->last_snap_seq;
+       up_read(&mdsc->snap_rwsem);
+
+       dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
+            want_tid, want_flush, want_snap);
 
        wait_unsafe_requests(mdsc, want_tid);
-       wait_caps_flush(mdsc, want_flush);
+       wait_caps_flush(mdsc, want_flush, want_snap);
 }
 
 /*
@@ -3549,10 +3594,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc)
  */
 void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 {
+       struct ceph_options *opts = mdsc->fsc->client->options;
        struct ceph_mds_session *session;
        int i;
-       struct ceph_fs_client *fsc = mdsc->fsc;
-       unsigned long timeout = fsc->client->options->mount_timeout * HZ;
 
        dout("close_sessions\n");
 
@@ -3573,7 +3617,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 
        dout("waiting for sessions to close\n");
        wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
-                          timeout);
+                          ceph_timeout_jiffies(opts->mount_timeout));
 
        /* tear down remaining sessions */
        mutex_lock(&mdsc->mutex);
@@ -3607,6 +3651,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
                ceph_mdsmap_destroy(mdsc->mdsmap);
        kfree(mdsc->sessions);
        ceph_caps_finalize(mdsc);
+       ceph_pool_perm_destroy(mdsc);
 }
 
 void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
index 1875b5d985c6b0df2fbb38e16f39a78ecc76750d..762757e6cebf95fff324894d1650b8271210acdd 100644 (file)
@@ -139,7 +139,6 @@ struct ceph_mds_session {
        int               s_cap_reconnect;
        int               s_readonly;
        struct list_head  s_cap_releases; /* waiting cap_release messages */
-       struct list_head  s_cap_releases_done; /* ready to send */
        struct ceph_cap  *s_cap_iterator;
 
        /* protected by mutex */
@@ -228,7 +227,7 @@ struct ceph_mds_request {
        int r_err;
        bool r_aborted;
 
-       unsigned long r_timeout;  /* optional.  jiffies */
+       unsigned long r_timeout;  /* optional.  jiffies, 0 is "wait forever" */
        unsigned long r_started;  /* start time to measure timeout against */
        unsigned long r_request_started; /* start time for mds request only,
                                            used to measure lease durations */
@@ -254,12 +253,21 @@ struct ceph_mds_request {
        bool              r_got_unsafe, r_got_safe, r_got_result;
 
        bool              r_did_prepopulate;
+       long long         r_dir_release_cnt;
+       long long         r_dir_ordered_cnt;
+       int               r_readdir_cache_idx;
        u32               r_readdir_offset;
 
        struct ceph_cap_reservation r_caps_reservation;
        int r_num_caps;
 };
 
+struct ceph_pool_perm {
+       struct rb_node node;
+       u32 pool;
+       int perm;
+};
+
 /*
  * mds client state
  */
@@ -284,12 +292,15 @@ struct ceph_mds_client {
         * references (implying they contain no inodes with caps) that
         * should be destroyed.
         */
+       u64                     last_snap_seq;
        struct rw_semaphore     snap_rwsem;
        struct rb_root          snap_realms;
        struct list_head        snap_empty;
        spinlock_t              snap_empty_lock;  /* protect snap_empty */
 
        u64                    last_tid;      /* most recent mds request */
+       u64                    oldest_tid;    /* oldest incomplete mds request,
+                                                excluding setfilelock requests */
        struct rb_root         request_tree;  /* pending mds requests */
        struct delayed_work    delayed_work;  /* delayed work */
        unsigned long    last_renew_caps;  /* last time we renewed our caps */
@@ -298,7 +309,8 @@ struct ceph_mds_client {
        struct list_head snap_flush_list;  /* cap_snaps ready to flush */
        spinlock_t       snap_flush_lock;
 
-       u64               cap_flush_seq;
+       u64               last_cap_flush_tid;
+       struct rb_root    cap_flush_tree;
        struct list_head  cap_dirty;        /* inodes with dirty caps */
        struct list_head  cap_dirty_migrating; /* ...that are migration... */
        int               num_cap_flushing; /* # caps we are flushing */
@@ -328,6 +340,9 @@ struct ceph_mds_client {
        spinlock_t        dentry_lru_lock;
        struct list_head  dentry_lru;
        int               num_dentry;
+
+       struct rw_semaphore     pool_perm_rwsem;
+       struct rb_root          pool_perm_tree;
 };
 
 extern const char *ceph_mds_op_name(int op);
@@ -379,8 +394,6 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
        kref_put(&req->r_kref, ceph_mdsc_release_request);
 }
 
-extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
-                                struct ceph_mds_session *session);
 extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
                                   struct ceph_mds_session *session);
 
index a97e39f09ba683349bb5f97e44f0d229b3a88936..233d906aec02b7c4508fd2488908bdb95a130aa4 100644 (file)
@@ -296,7 +296,7 @@ static int cmpu64_rev(const void *a, const void *b)
 }
 
 
-static struct ceph_snap_context *empty_snapc;
+struct ceph_snap_context *ceph_empty_snapc;
 
 /*
  * build the snap context for a given realm.
@@ -338,9 +338,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
                return 0;
        }
 
-       if (num == 0 && realm->seq == empty_snapc->seq) {
-               ceph_get_snap_context(empty_snapc);
-               snapc = empty_snapc;
+       if (num == 0 && realm->seq == ceph_empty_snapc->seq) {
+               ceph_get_snap_context(ceph_empty_snapc);
+               snapc = ceph_empty_snapc;
                goto done;
        }
 
@@ -436,6 +436,14 @@ static int dup_array(u64 **dst, __le64 *src, u32 num)
        return 0;
 }
 
+static bool has_new_snaps(struct ceph_snap_context *o,
+                         struct ceph_snap_context *n)
+{
+       if (n->num_snaps == 0)
+               return false;
+       /* snaps are in descending order */
+       return n->snaps[0] > o->seq;
+}
 
 /*
  * When a snapshot is applied, the size/mtime inode metadata is queued
@@ -455,6 +463,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 {
        struct inode *inode = &ci->vfs_inode;
        struct ceph_cap_snap *capsnap;
+       struct ceph_snap_context *old_snapc, *new_snapc;
        int used, dirty;
 
        capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
@@ -467,6 +476,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
        used = __ceph_caps_used(ci);
        dirty = __ceph_caps_dirty(ci);
 
+       old_snapc = ci->i_head_snapc;
+       new_snapc = ci->i_snap_realm->cached_context;
+
        /*
         * If there is a write in progress, treat that as a dirty Fw,
         * even though it hasn't completed yet; by the time we finish
@@ -481,76 +493,95 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
                   writes in progress now were started before the previous
                   cap_snap.  lucky us. */
                dout("queue_cap_snap %p already pending\n", inode);
-               kfree(capsnap);
-       } else if (ci->i_snap_realm->cached_context == empty_snapc) {
-               dout("queue_cap_snap %p empty snapc\n", inode);
-               kfree(capsnap);
-       } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
-                           CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
-               struct ceph_snap_context *snapc = ci->i_head_snapc;
-
-               /*
-                * if we are a sync write, we may need to go to the snaprealm
-                * to get the current snapc.
-                */
-               if (!snapc)
-                       snapc = ci->i_snap_realm->cached_context;
+               goto update_snapc;
+       }
+       if (ci->i_wrbuffer_ref_head == 0 &&
+           !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) {
+               dout("queue_cap_snap %p nothing dirty|writing\n", inode);
+               goto update_snapc;
+       }
 
-               dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
-                    inode, capsnap, snapc, ceph_cap_string(dirty));
-               ihold(inode);
+       BUG_ON(!old_snapc);
 
-               atomic_set(&capsnap->nref, 1);
-               capsnap->ci = ci;
-               INIT_LIST_HEAD(&capsnap->ci_item);
-               INIT_LIST_HEAD(&capsnap->flushing_item);
-
-               capsnap->follows = snapc->seq;
-               capsnap->issued = __ceph_caps_issued(ci, NULL);
-               capsnap->dirty = dirty;
-
-               capsnap->mode = inode->i_mode;
-               capsnap->uid = inode->i_uid;
-               capsnap->gid = inode->i_gid;
-
-               if (dirty & CEPH_CAP_XATTR_EXCL) {
-                       __ceph_build_xattrs_blob(ci);
-                       capsnap->xattr_blob =
-                               ceph_buffer_get(ci->i_xattrs.blob);
-                       capsnap->xattr_version = ci->i_xattrs.version;
-               } else {
-                       capsnap->xattr_blob = NULL;
-                       capsnap->xattr_version = 0;
+       /*
+        * There is no need to send FLUSHSNAP message to MDS if there is
+        * no new snapshot. But when there is dirty pages or on-going
+        * writes, we still need to create cap_snap. cap_snap is needed
+        * by the write path and page writeback path.
+        *
+        * also see ceph_try_drop_cap_snap()
+        */
+       if (has_new_snaps(old_snapc, new_snapc)) {
+               if (dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))
+                       capsnap->need_flush = true;
+       } else {
+               if (!(used & CEPH_CAP_FILE_WR) &&
+                   ci->i_wrbuffer_ref_head == 0) {
+                       dout("queue_cap_snap %p "
+                            "no new_snap|dirty_page|writing\n", inode);
+                       goto update_snapc;
                }
+       }
 
-               capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
-
-               /* dirty page count moved from _head to this cap_snap;
-                  all subsequent writes page dirties occur _after_ this
-                  snapshot. */
-               capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
-               ci->i_wrbuffer_ref_head = 0;
-               capsnap->context = snapc;
-               ci->i_head_snapc =
-                       ceph_get_snap_context(ci->i_snap_realm->cached_context);
-               dout(" new snapc is %p\n", ci->i_head_snapc);
-               list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
-
-               if (used & CEPH_CAP_FILE_WR) {
-                       dout("queue_cap_snap %p cap_snap %p snapc %p"
-                            " seq %llu used WR, now pending\n", inode,
-                            capsnap, snapc, snapc->seq);
-                       capsnap->writing = 1;
-               } else {
-                       /* note mtime, size NOW. */
-                       __ceph_finish_cap_snap(ci, capsnap);
-               }
+       dout("queue_cap_snap %p cap_snap %p queuing under %p %s %s\n",
+            inode, capsnap, old_snapc, ceph_cap_string(dirty),
+            capsnap->need_flush ? "" : "no_flush");
+       ihold(inode);
+
+       atomic_set(&capsnap->nref, 1);
+       capsnap->ci = ci;
+       INIT_LIST_HEAD(&capsnap->ci_item);
+       INIT_LIST_HEAD(&capsnap->flushing_item);
+
+       capsnap->follows = old_snapc->seq;
+       capsnap->issued = __ceph_caps_issued(ci, NULL);
+       capsnap->dirty = dirty;
+
+       capsnap->mode = inode->i_mode;
+       capsnap->uid = inode->i_uid;
+       capsnap->gid = inode->i_gid;
+
+       if (dirty & CEPH_CAP_XATTR_EXCL) {
+               __ceph_build_xattrs_blob(ci);
+               capsnap->xattr_blob =
+                       ceph_buffer_get(ci->i_xattrs.blob);
+               capsnap->xattr_version = ci->i_xattrs.version;
        } else {
-               dout("queue_cap_snap %p nothing dirty|writing\n", inode);
-               kfree(capsnap);
+               capsnap->xattr_blob = NULL;
+               capsnap->xattr_version = 0;
        }
 
+       capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
+       /* dirty page count moved from _head to this cap_snap;
+          all subsequent writes page dirties occur _after_ this
+          snapshot. */
+       capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
+       ci->i_wrbuffer_ref_head = 0;
+       capsnap->context = old_snapc;
+       list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
+       old_snapc = NULL;
+
+       if (used & CEPH_CAP_FILE_WR) {
+               dout("queue_cap_snap %p cap_snap %p snapc %p"
+                    " seq %llu used WR, now pending\n", inode,
+                    capsnap, old_snapc, old_snapc->seq);
+               capsnap->writing = 1;
+       } else {
+               /* note mtime, size NOW. */
+               __ceph_finish_cap_snap(ci, capsnap);
+       }
+       capsnap = NULL;
+
+update_snapc:
+       if (ci->i_head_snapc) {
+               ci->i_head_snapc = ceph_get_snap_context(new_snapc);
+               dout(" new snapc is %p\n", new_snapc);
+       }
        spin_unlock(&ci->i_ceph_lock);
+
+       kfree(capsnap);
+       ceph_put_snap_context(old_snapc);
 }
 
 /*
@@ -699,6 +730,8 @@ more:
 
                /* queue realm for cap_snap creation */
                list_add(&realm->dirty_item, &dirty_realms);
+               if (realm->seq > mdsc->last_snap_seq)
+                       mdsc->last_snap_seq = realm->seq;
 
                invalidate = 1;
        } else if (!realm->cached_context) {
@@ -964,14 +997,14 @@ out:
 
 int __init ceph_snap_init(void)
 {
-       empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
-       if (!empty_snapc)
+       ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+       if (!ceph_empty_snapc)
                return -ENOMEM;
-       empty_snapc->seq = 1;
+       ceph_empty_snapc->seq = 1;
        return 0;
 }
 
 void ceph_snap_exit(void)
 {
-       ceph_put_snap_context(empty_snapc);
+       ceph_put_snap_context(ceph_empty_snapc);
 }
index 4e9905374078a228d18762a46a782a925f8f1675..d1c833c321b92eff48d9f35bf7171ef0ac59e7bf 100644 (file)
@@ -134,10 +134,12 @@ enum {
        Opt_noino32,
        Opt_fscache,
        Opt_nofscache,
+       Opt_poolperm,
+       Opt_nopoolperm,
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        Opt_acl,
 #endif
-       Opt_noacl
+       Opt_noacl,
 };
 
 static match_table_t fsopt_tokens = {
@@ -165,6 +167,8 @@ static match_table_t fsopt_tokens = {
        {Opt_noino32, "noino32"},
        {Opt_fscache, "fsc"},
        {Opt_nofscache, "nofsc"},
+       {Opt_poolperm, "poolperm"},
+       {Opt_nopoolperm, "nopoolperm"},
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        {Opt_acl, "acl"},
 #endif
@@ -268,6 +272,13 @@ static int parse_fsopt_token(char *c, void *private)
        case Opt_nofscache:
                fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
                break;
+       case Opt_poolperm:
+               fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
+               printk ("pool perm");
+               break;
+       case Opt_nopoolperm:
+               fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
+               break;
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        case Opt_acl:
                fsopt->sb_flags |= MS_POSIXACL;
@@ -436,6 +447,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
                seq_puts(m, ",nodcache");
        if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
                seq_puts(m, ",fsc");
+       if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
+               seq_puts(m, ",nopoolperm");
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        if (fsopt->sb_flags & MS_POSIXACL)
@@ -609,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
  */
 struct kmem_cache *ceph_inode_cachep;
 struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_cap_flush_cachep;
 struct kmem_cache *ceph_dentry_cachep;
 struct kmem_cache *ceph_file_cachep;
 
@@ -634,6 +648,10 @@ static int __init init_caches(void)
                                     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
        if (ceph_cap_cachep == NULL)
                goto bad_cap;
+       ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
+                                          SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+       if (ceph_cap_flush_cachep == NULL)
+               goto bad_cap_flush;
 
        ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
                                        SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
@@ -652,6 +670,8 @@ static int __init init_caches(void)
 bad_file:
        kmem_cache_destroy(ceph_dentry_cachep);
 bad_dentry:
+       kmem_cache_destroy(ceph_cap_flush_cachep);
+bad_cap_flush:
        kmem_cache_destroy(ceph_cap_cachep);
 bad_cap:
        kmem_cache_destroy(ceph_inode_cachep);
@@ -668,6 +688,7 @@ static void destroy_caches(void)
 
        kmem_cache_destroy(ceph_inode_cachep);
        kmem_cache_destroy(ceph_cap_cachep);
+       kmem_cache_destroy(ceph_cap_flush_cachep);
        kmem_cache_destroy(ceph_dentry_cachep);
        kmem_cache_destroy(ceph_file_cachep);
 
@@ -729,7 +750,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
        req->r_ino1.ino = CEPH_INO_ROOT;
        req->r_ino1.snap = CEPH_NOSNAP;
        req->r_started = started;
-       req->r_timeout = fsc->client->options->mount_timeout * HZ;
+       req->r_timeout = fsc->client->options->mount_timeout;
        req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
index fa20e131893956a5360b2f6c37cd1aa7fce7542e..860cc016e70d4ff463c1f7845fc648eaf58269c4 100644 (file)
@@ -35,6 +35,7 @@
 #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
+#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
 
 #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES | \
                                   CEPH_MOUNT_OPT_DCACHE)
@@ -121,11 +122,21 @@ struct ceph_cap {
        struct rb_node ci_node;          /* per-ci cap tree */
        struct ceph_mds_session *session;
        struct list_head session_caps;   /* per-session caplist */
-       int mds;
        u64 cap_id;       /* unique cap id (mds provided) */
-       int issued;       /* latest, from the mds */
-       int implemented;  /* implemented superset of issued (for revocation) */
-       int mds_wanted;
+       union {
+               /* in-use caps */
+               struct {
+                       int issued;       /* latest, from the mds */
+                       int implemented;  /* implemented superset of
+                                            issued (for revocation) */
+                       int mds, mds_wanted;
+               };
+               /* caps to release */
+               struct {
+                       u64 cap_ino;
+                       int queue_release;
+               };
+       };
        u32 seq, issue_seq, mseq;
        u32 cap_gen;      /* active/stale cycle */
        unsigned long last_used;
@@ -163,6 +174,7 @@ struct ceph_cap_snap {
        int writing;   /* a sync write is still in progress */
        int dirty_pages;     /* dirty pages awaiting writeback */
        bool inline_data;
+       bool need_flush;
 };
 
 static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -174,6 +186,17 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
        }
 }
 
+struct ceph_cap_flush {
+       u64 tid;
+       int caps;
+       bool kick;
+       struct rb_node g_node; // global
+       union {
+               struct rb_node i_node; // inode
+               struct list_head list;
+       };
+};
+
 /*
  * The frag tree describes how a directory is fragmented, potentially across
  * multiple metadata servers.  It is also used to indicate points where
@@ -259,9 +282,9 @@ struct ceph_inode_info {
        u32 i_time_warp_seq;
 
        unsigned i_ceph_flags;
-       int i_ordered_count;
-       atomic_t i_release_count;
-       atomic_t i_complete_count;
+       atomic64_t i_release_count;
+       atomic64_t i_ordered_count;
+       atomic64_t i_complete_seq[2];
 
        struct ceph_dir_layout i_dir_layout;
        struct ceph_file_layout i_layout;
@@ -283,11 +306,11 @@ struct ceph_inode_info {
        struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
        unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
        struct list_head i_dirty_item, i_flushing_item;
-       u64 i_cap_flush_seq;
        /* we need to track cap writeback on a per-cap-bit basis, to allow
         * overlapping, pipelined cap flushes to the mds.  we can probably
         * reduce the tid to 8 bits if we're concerned about inode size. */
-       u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS];
+       struct ceph_cap_flush *i_prealloc_cap_flush;
+       struct rb_root i_cap_flush_tree;
        wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
        unsigned long i_hold_caps_min; /* jiffies */
        unsigned long i_hold_caps_max; /* jiffies */
@@ -438,36 +461,46 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
  * Ceph inode.
  */
-#define CEPH_I_DIR_ORDERED     1  /* dentries in dir are ordered */
-#define CEPH_I_NODELAY         4  /* do not delay cap release */
-#define CEPH_I_FLUSH           8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH         16 /* do not flush dirty caps */
+#define CEPH_I_DIR_ORDERED     (1 << 0)  /* dentries in dir are ordered */
+#define CEPH_I_NODELAY         (1 << 1)  /* do not delay cap release */
+#define CEPH_I_FLUSH           (1 << 2)  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH         (1 << 3)  /* do not flush dirty caps */
+#define CEPH_I_POOL_PERM       (1 << 4)  /* pool rd/wr bits are valid */
+#define CEPH_I_POOL_RD         (1 << 5)  /* can read from pool */
+#define CEPH_I_POOL_WR         (1 << 6)  /* can write to pool */
+
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
-                                          int release_count, int ordered_count)
+                                          long long release_count,
+                                          long long ordered_count)
 {
-       atomic_set(&ci->i_complete_count, release_count);
-       if (ci->i_ordered_count == ordered_count)
-               ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
-       else
-               ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
+       smp_mb__before_atomic();
+       atomic64_set(&ci->i_complete_seq[0], release_count);
+       atomic64_set(&ci->i_complete_seq[1], ordered_count);
 }
 
 static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
 {
-       atomic_inc(&ci->i_release_count);
+       atomic64_inc(&ci->i_release_count);
+}
+
+static inline void __ceph_dir_clear_ordered(struct ceph_inode_info *ci)
+{
+       atomic64_inc(&ci->i_ordered_count);
 }
 
 static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
 {
-       return atomic_read(&ci->i_complete_count) ==
-               atomic_read(&ci->i_release_count);
+       return atomic64_read(&ci->i_complete_seq[0]) ==
+               atomic64_read(&ci->i_release_count);
 }
 
 static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
 {
-       return __ceph_dir_is_complete(ci) &&
-               (ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
+       return  atomic64_read(&ci->i_complete_seq[0]) ==
+               atomic64_read(&ci->i_release_count) &&
+               atomic64_read(&ci->i_complete_seq[1]) ==
+               atomic64_read(&ci->i_ordered_count);
 }
 
 static inline void ceph_dir_clear_complete(struct inode *inode)
@@ -477,20 +510,13 @@ static inline void ceph_dir_clear_complete(struct inode *inode)
 
 static inline void ceph_dir_clear_ordered(struct inode *inode)
 {
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       spin_lock(&ci->i_ceph_lock);
-       ci->i_ordered_count++;
-       ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
-       spin_unlock(&ci->i_ceph_lock);
+       __ceph_dir_clear_ordered(ceph_inode(inode));
 }
 
 static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
 {
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       bool ret;
-       spin_lock(&ci->i_ceph_lock);
-       ret = __ceph_dir_is_complete_ordered(ci);
-       spin_unlock(&ci->i_ceph_lock);
+       bool ret = __ceph_dir_is_complete_ordered(ceph_inode(inode));
+       smp_rmb();
        return ret;
 }
 
@@ -552,7 +578,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
 {
        return ci->i_dirty_caps | ci->i_flushing_caps;
 }
-extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+extern struct ceph_cap_flush *ceph_alloc_cap_flush(void);
+extern void ceph_free_cap_flush(struct ceph_cap_flush *cf);
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
+                                 struct ceph_cap_flush **pcf);
 
 extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
                                      struct ceph_cap *ocap, int mask);
@@ -606,16 +635,20 @@ struct ceph_file_info {
        unsigned offset;       /* offset of last chunk, adjusted for . and .. */
        unsigned next_offset;  /* offset of next chunk (last_name's + 1) */
        char *last_name;       /* last entry in previous chunk */
-       struct dentry *dentry; /* next dentry (for dcache readdir) */
-       int dir_release_count;
-       int dir_ordered_count;
+       long long dir_release_count;
+       long long dir_ordered_count;
+       int readdir_cache_idx;
 
        /* used for -o dirstat read() on directory thing */
        char *dir_info;
        int dir_info_len;
 };
 
-
+struct ceph_readdir_cache_control {
+       struct page  *page;
+       struct dentry **dentries;
+       int index;
+};
 
 /*
  * A "snap realm" describes a subset of the file hierarchy sharing
@@ -687,6 +720,7 @@ static inline int default_congestion_kb(void)
 
 
 /* snap.c */
+extern struct ceph_snap_context *ceph_empty_snapc;
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
                                               u64 ino);
 extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
@@ -713,8 +747,8 @@ extern void ceph_snap_exit(void);
 static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
 {
        return !list_empty(&ci->i_cap_snaps) &&
-               list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap,
-                          ci_item)->writing;
+              list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap,
+                              ci_item)->writing;
 }
 
 /* inode.c */
@@ -838,12 +872,12 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc,
                         struct ceph_cap *cap);
 extern int ceph_is_any_caps(struct inode *inode);
 
-extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino,
-                               u64 cap_id, u32 migrate_seq, u32 issue_seq);
 extern void ceph_queue_caps_release(struct inode *inode);
 extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
 extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
                      int datasync);
+extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_session *session);
 extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                                    struct ceph_mds_session *session);
 extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
@@ -879,6 +913,9 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
 /* addr.c */
 extern const struct address_space_operations ceph_aops;
 extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
+extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
+extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need);
+extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
 
 /* file.c */
 extern const struct file_operations ceph_file_fops;
@@ -890,7 +927,6 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
                                  char *data, size_t len);
-int ceph_uninline_data(struct file *filp, struct page *locked_page);
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
 extern const struct file_operations ceph_snapdir_fops;
@@ -911,6 +947,7 @@ extern void ceph_dentry_lru_del(struct dentry *dn);
 extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
 extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
 extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
+extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl);
 
 /*
  * our d_ops vary depending on whether the inode is live,
index cd7ffad4041d81b605bdfbc97a4bc0072c19ca99..819163d8313bb3748765b5c3313dfdfd32ac9472 100644 (file)
@@ -911,6 +911,8 @@ int __ceph_setxattr(struct dentry *dentry, const char *name,
        struct inode *inode = d_inode(dentry);
        struct ceph_vxattr *vxattr;
        struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
+       struct ceph_cap_flush *prealloc_cf = NULL;
        int issued;
        int err;
        int dirty = 0;
@@ -920,6 +922,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name,
        char *newval = NULL;
        struct ceph_inode_xattr *xattr = NULL;
        int required_blob_size;
+       bool lock_snap_rwsem = false;
 
        if (!ceph_is_valid_xattr(name))
                return -EOPNOTSUPP;
@@ -948,12 +951,27 @@ int __ceph_setxattr(struct dentry *dentry, const char *name,
        if (!xattr)
                goto out;
 
+       prealloc_cf = ceph_alloc_cap_flush();
+       if (!prealloc_cf)
+               goto out;
+
        spin_lock(&ci->i_ceph_lock);
 retry:
        issued = __ceph_caps_issued(ci, NULL);
-       dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
                goto do_sync;
+
+       if (!lock_snap_rwsem && !ci->i_head_snapc) {
+               lock_snap_rwsem = true;
+               if (!down_read_trylock(&mdsc->snap_rwsem)) {
+                       spin_unlock(&ci->i_ceph_lock);
+                       down_read(&mdsc->snap_rwsem);
+                       spin_lock(&ci->i_ceph_lock);
+                       goto retry;
+               }
+       }
+
+       dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
        __build_xattrs(inode);
 
        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
@@ -966,7 +984,7 @@ retry:
                dout(" preaallocating new blob size=%d\n", required_blob_size);
                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
                if (!blob)
-                       goto out;
+                       goto do_sync_unlocked;
                spin_lock(&ci->i_ceph_lock);
                if (ci->i_xattrs.prealloc_blob)
                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
@@ -978,21 +996,28 @@ retry:
                          flags, value ? 1 : -1, &xattr);
 
        if (!err) {
-               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+               dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
+                                              &prealloc_cf);
                ci->i_xattrs.dirty = true;
                inode->i_ctime = CURRENT_TIME;
        }
 
        spin_unlock(&ci->i_ceph_lock);
+       if (lock_snap_rwsem)
+               up_read(&mdsc->snap_rwsem);
        if (dirty)
                __mark_inode_dirty(inode, dirty);
+       ceph_free_cap_flush(prealloc_cf);
        return err;
 
 do_sync:
        spin_unlock(&ci->i_ceph_lock);
 do_sync_unlocked:
+       if (lock_snap_rwsem)
+               up_read(&mdsc->snap_rwsem);
        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 out:
+       ceph_free_cap_flush(prealloc_cf);
        kfree(newname);
        kfree(newval);
        kfree(xattr);
@@ -1044,10 +1069,13 @@ int __ceph_removexattr(struct dentry *dentry, const char *name)
        struct inode *inode = d_inode(dentry);
        struct ceph_vxattr *vxattr;
        struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
+       struct ceph_cap_flush *prealloc_cf = NULL;
        int issued;
        int err;
        int required_blob_size;
        int dirty;
+       bool lock_snap_rwsem = false;
 
        if (!ceph_is_valid_xattr(name))
                return -EOPNOTSUPP;
@@ -1060,14 +1088,29 @@ int __ceph_removexattr(struct dentry *dentry, const char *name)
        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
                goto do_sync_unlocked;
 
+       prealloc_cf = ceph_alloc_cap_flush();
+       if (!prealloc_cf)
+               return -ENOMEM;
+
        err = -ENOMEM;
        spin_lock(&ci->i_ceph_lock);
 retry:
        issued = __ceph_caps_issued(ci, NULL);
-       dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
-
        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
                goto do_sync;
+
+       if (!lock_snap_rwsem && !ci->i_head_snapc) {
+               lock_snap_rwsem = true;
+               if (!down_read_trylock(&mdsc->snap_rwsem)) {
+                       spin_unlock(&ci->i_ceph_lock);
+                       down_read(&mdsc->snap_rwsem);
+                       spin_lock(&ci->i_ceph_lock);
+                       goto retry;
+               }
+       }
+
+       dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
+
        __build_xattrs(inode);
 
        required_blob_size = __get_required_blob_size(ci, 0, 0);
@@ -1080,7 +1123,7 @@ retry:
                dout(" preaallocating new blob size=%d\n", required_blob_size);
                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
                if (!blob)
-                       goto out;
+                       goto do_sync_unlocked;
                spin_lock(&ci->i_ceph_lock);
                if (ci->i_xattrs.prealloc_blob)
                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
@@ -1090,18 +1133,24 @@ retry:
 
        err = __remove_xattr_by_name(ceph_inode(inode), name);
 
-       dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+       dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
+                                      &prealloc_cf);
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
        spin_unlock(&ci->i_ceph_lock);
+       if (lock_snap_rwsem)
+               up_read(&mdsc->snap_rwsem);
        if (dirty)
                __mark_inode_dirty(inode, dirty);
+       ceph_free_cap_flush(prealloc_cf);
        return err;
 do_sync:
        spin_unlock(&ci->i_ceph_lock);
 do_sync_unlocked:
+       if (lock_snap_rwsem)
+               up_read(&mdsc->snap_rwsem);
+       ceph_free_cap_flush(prealloc_cf);
        err = ceph_send_removexattr(dentry, name);
-out:
        return err;
 }
 
index e5bbf748b6987a922fa211e3edb083346927f3b6..eae2c11268bcb484075cfd08482beeb172dd66bc 100644 (file)
@@ -489,6 +489,7 @@ static void cuse_fc_release(struct fuse_conn *fc)
  */
 static int cuse_channel_open(struct inode *inode, struct file *file)
 {
+       struct fuse_dev *fud;
        struct cuse_conn *cc;
        int rc;
 
@@ -499,17 +500,22 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
 
        fuse_conn_init(&cc->fc);
 
+       fud = fuse_dev_alloc(&cc->fc);
+       if (!fud) {
+               kfree(cc);
+               return -ENOMEM;
+       }
+
        INIT_LIST_HEAD(&cc->list);
        cc->fc.release = cuse_fc_release;
 
-       cc->fc.connected = 1;
        cc->fc.initialized = 1;
        rc = cuse_send_init(cc);
        if (rc) {
-               fuse_conn_put(&cc->fc);
+               fuse_dev_free(fud);
                return rc;
        }
-       file->private_data = &cc->fc;   /* channel owns base reference to cc */
+       file->private_data = fud;
 
        return 0;
 }
@@ -527,7 +533,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
  */
 static int cuse_channel_release(struct inode *inode, struct file *file)
 {
-       struct cuse_conn *cc = fc_to_cc(file->private_data);
+       struct fuse_dev *fud = file->private_data;
+       struct cuse_conn *cc = fc_to_cc(fud->fc);
        int rc;
 
        /* remove from the conntbl, no more access from this point on */
index c8b68ab2e574a86f13fab97f9ed47b14a4e139d6..80cc1b35d46043c16bc456e0cadf61e76c281d52 100644 (file)
@@ -25,13 +25,13 @@ MODULE_ALIAS("devname:fuse");
 
 static struct kmem_cache *fuse_req_cachep;
 
-static struct fuse_conn *fuse_get_conn(struct file *file)
+static struct fuse_dev *fuse_get_dev(struct file *file)
 {
        /*
         * Lockless access is OK, because file->private data is set
         * once during mount and is valid until the file is released.
         */
-       return file->private_data;
+       return ACCESS_ONCE(file->private_data);
 }
 
 static void fuse_request_init(struct fuse_req *req, struct page **pages,
@@ -48,6 +48,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
        req->pages = pages;
        req->page_descs = page_descs;
        req->max_pages = npages;
+       __set_bit(FR_PENDING, &req->flags);
 }
 
 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
@@ -168,6 +169,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
        if (!fc->connected)
                goto out;
 
+       err = -ECONNREFUSED;
+       if (fc->conn_error)
+               goto out;
+
        req = fuse_request_alloc(npages);
        err = -ENOMEM;
        if (!req) {
@@ -177,8 +182,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
        }
 
        fuse_req_init_context(req);
-       req->waiting = 1;
-       req->background = for_background;
+       __set_bit(FR_WAITING, &req->flags);
+       if (for_background)
+               __set_bit(FR_BACKGROUND, &req->flags);
+
        return req;
 
  out:
@@ -268,15 +275,15 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
                req = get_reserved_req(fc, file);
 
        fuse_req_init_context(req);
-       req->waiting = 1;
-       req->background = 0;
+       __set_bit(FR_WAITING, &req->flags);
+       __clear_bit(FR_BACKGROUND, &req->flags);
        return req;
 }
 
 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 {
        if (atomic_dec_and_test(&req->count)) {
-               if (unlikely(req->background)) {
+               if (test_bit(FR_BACKGROUND, &req->flags)) {
                        /*
                         * We get here in the unlikely case that a background
                         * request was allocated but not sent
@@ -287,8 +294,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
                        spin_unlock(&fc->lock);
                }
 
-               if (req->waiting)
+               if (test_bit(FR_WAITING, &req->flags)) {
+                       __clear_bit(FR_WAITING, &req->flags);
                        atomic_dec(&fc->num_waiting);
+               }
 
                if (req->stolen_file)
                        put_reserved_req(fc, req);
@@ -309,46 +318,38 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
        return nbytes;
 }
 
-static u64 fuse_get_unique(struct fuse_conn *fc)
+static u64 fuse_get_unique(struct fuse_iqueue *fiq)
 {
-       fc->reqctr++;
-       /* zero is special */
-       if (fc->reqctr == 0)
-               fc->reqctr = 1;
-
-       return fc->reqctr;
+       return ++fiq->reqctr;
 }
 
-static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
+static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
        req->in.h.len = sizeof(struct fuse_in_header) +
                len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
-       list_add_tail(&req->list, &fc->pending);
-       req->state = FUSE_REQ_PENDING;
-       if (!req->waiting) {
-               req->waiting = 1;
-               atomic_inc(&fc->num_waiting);
-       }
-       wake_up(&fc->waitq);
-       kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+       list_add_tail(&req->list, &fiq->pending);
+       wake_up_locked(&fiq->waitq);
+       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 }
 
 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
                       u64 nodeid, u64 nlookup)
 {
+       struct fuse_iqueue *fiq = &fc->iq;
+
        forget->forget_one.nodeid = nodeid;
        forget->forget_one.nlookup = nlookup;
 
-       spin_lock(&fc->lock);
-       if (fc->connected) {
-               fc->forget_list_tail->next = forget;
-               fc->forget_list_tail = forget;
-               wake_up(&fc->waitq);
-               kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+       spin_lock(&fiq->waitq.lock);
+       if (fiq->connected) {
+               fiq->forget_list_tail->next = forget;
+               fiq->forget_list_tail = forget;
+               wake_up_locked(&fiq->waitq);
+               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
        } else {
                kfree(forget);
        }
-       spin_unlock(&fc->lock);
+       spin_unlock(&fiq->waitq.lock);
 }
 
 static void flush_bg_queue(struct fuse_conn *fc)
@@ -356,12 +357,15 @@ static void flush_bg_queue(struct fuse_conn *fc)
        while (fc->active_background < fc->max_background &&
               !list_empty(&fc->bg_queue)) {
                struct fuse_req *req;
+               struct fuse_iqueue *fiq = &fc->iq;
 
                req = list_entry(fc->bg_queue.next, struct fuse_req, list);
                list_del(&req->list);
                fc->active_background++;
-               req->in.h.unique = fuse_get_unique(fc);
-               queue_request(fc, req);
+               spin_lock(&fiq->waitq.lock);
+               req->in.h.unique = fuse_get_unique(fiq);
+               queue_request(fiq, req);
+               spin_unlock(&fiq->waitq.lock);
        }
 }
 
@@ -372,20 +376,22 @@ static void flush_bg_queue(struct fuse_conn *fc)
  * was closed.  The requester thread is woken up (if still waiting),
  * the 'end' callback is called if given, else the reference to the
  * request is released
- *
- * Called with fc->lock, unlocks it
  */
 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
-__releases(fc->lock)
 {
-       void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
-       req->end = NULL;
-       list_del(&req->list);
-       list_del(&req->intr_entry);
-       req->state = FUSE_REQ_FINISHED;
-       if (req->background) {
-               req->background = 0;
+       struct fuse_iqueue *fiq = &fc->iq;
+
+       if (test_and_set_bit(FR_FINISHED, &req->flags))
+               return;
 
+       spin_lock(&fiq->waitq.lock);
+       list_del_init(&req->intr_entry);
+       spin_unlock(&fiq->waitq.lock);
+       WARN_ON(test_bit(FR_PENDING, &req->flags));
+       WARN_ON(test_bit(FR_SENT, &req->flags));
+       if (test_bit(FR_BACKGROUND, &req->flags)) {
+               spin_lock(&fc->lock);
+               clear_bit(FR_BACKGROUND, &req->flags);
                if (fc->num_background == fc->max_background)
                        fc->blocked = 0;
 
@@ -401,122 +407,105 @@ __releases(fc->lock)
                fc->num_background--;
                fc->active_background--;
                flush_bg_queue(fc);
+               spin_unlock(&fc->lock);
        }
-       spin_unlock(&fc->lock);
        wake_up(&req->waitq);
-       if (end)
-               end(fc, req);
+       if (req->end)
+               req->end(fc, req);
        fuse_put_request(fc, req);
 }
 
-static void wait_answer_interruptible(struct fuse_conn *fc,
-                                     struct fuse_req *req)
-__releases(fc->lock)
-__acquires(fc->lock)
-{
-       if (signal_pending(current))
-               return;
-
-       spin_unlock(&fc->lock);
-       wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
-       spin_lock(&fc->lock);
-}
-
-static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
+static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
-       list_add_tail(&req->intr_entry, &fc->interrupts);
-       wake_up(&fc->waitq);
-       kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+       spin_lock(&fiq->waitq.lock);
+       if (list_empty(&req->intr_entry)) {
+               list_add_tail(&req->intr_entry, &fiq->interrupts);
+               wake_up_locked(&fiq->waitq);
+       }
+       spin_unlock(&fiq->waitq.lock);
+       kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
 }
 
 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
-__releases(fc->lock)
-__acquires(fc->lock)
 {
+       struct fuse_iqueue *fiq = &fc->iq;
+       int err;
+
        if (!fc->no_interrupt) {
                /* Any signal may interrupt this */
-               wait_answer_interruptible(fc, req);
-
-               if (req->aborted)
-                       goto aborted;
-               if (req->state == FUSE_REQ_FINISHED)
+               err = wait_event_interruptible(req->waitq,
+                                       test_bit(FR_FINISHED, &req->flags));
+               if (!err)
                        return;
 
-               req->interrupted = 1;
-               if (req->state == FUSE_REQ_SENT)
-                       queue_interrupt(fc, req);
+               set_bit(FR_INTERRUPTED, &req->flags);
+               /* matches barrier in fuse_dev_do_read() */
+               smp_mb__after_atomic();
+               if (test_bit(FR_SENT, &req->flags))
+                       queue_interrupt(fiq, req);
        }
 
-       if (!req->force) {
+       if (!test_bit(FR_FORCE, &req->flags)) {
                sigset_t oldset;
 
                /* Only fatal signals may interrupt this */
                block_sigs(&oldset);
-               wait_answer_interruptible(fc, req);
+               err = wait_event_interruptible(req->waitq,
+                                       test_bit(FR_FINISHED, &req->flags));
                restore_sigs(&oldset);
 
-               if (req->aborted)
-                       goto aborted;
-               if (req->state == FUSE_REQ_FINISHED)
+               if (!err)
                        return;
 
+               spin_lock(&fiq->waitq.lock);
                /* Request is not yet in userspace, bail out */
-               if (req->state == FUSE_REQ_PENDING) {
+               if (test_bit(FR_PENDING, &req->flags)) {
                        list_del(&req->list);
+                       spin_unlock(&fiq->waitq.lock);
                        __fuse_put_request(req);
                        req->out.h.error = -EINTR;
                        return;
                }
+               spin_unlock(&fiq->waitq.lock);
        }
 
        /*
         * Either request is already in userspace, or it was forced.
         * Wait it out.
         */
-       spin_unlock(&fc->lock);
-       wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
-       spin_lock(&fc->lock);
-
-       if (!req->aborted)
-               return;
-
- aborted:
-       BUG_ON(req->state != FUSE_REQ_FINISHED);
-       if (req->locked) {
-               /* This is uninterruptible sleep, because data is
-                  being copied to/from the buffers of req.  During
-                  locked state, there mustn't be any filesystem
-                  operation (e.g. page fault), since that could lead
-                  to deadlock */
-               spin_unlock(&fc->lock);
-               wait_event(req->waitq, !req->locked);
-               spin_lock(&fc->lock);
-       }
+       wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
 }
 
 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 {
-       BUG_ON(req->background);
-       spin_lock(&fc->lock);
-       if (!fc->connected)
+       struct fuse_iqueue *fiq = &fc->iq;
+
+       BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
+       spin_lock(&fiq->waitq.lock);
+       if (!fiq->connected) {
+               spin_unlock(&fiq->waitq.lock);
                req->out.h.error = -ENOTCONN;
-       else if (fc->conn_error)
-               req->out.h.error = -ECONNREFUSED;
-       else {
-               req->in.h.unique = fuse_get_unique(fc);
-               queue_request(fc, req);
+       } else {
+               req->in.h.unique = fuse_get_unique(fiq);
+               queue_request(fiq, req);
                /* acquire extra reference, since request is still needed
                   after request_end() */
                __fuse_get_request(req);
+               spin_unlock(&fiq->waitq.lock);
 
                request_wait_answer(fc, req);
+               /* Pairs with smp_wmb() in request_end() */
+               smp_rmb();
        }
-       spin_unlock(&fc->lock);
 }
 
 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 {
-       req->isreply = 1;
+       __set_bit(FR_ISREPLY, &req->flags);
+       if (!test_bit(FR_WAITING, &req->flags)) {
+               __set_bit(FR_WAITING, &req->flags);
+               atomic_inc(&fc->num_waiting);
+       }
        __fuse_request_send(fc, req);
 }
 EXPORT_SYMBOL_GPL(fuse_request_send);
@@ -586,10 +575,20 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
        return ret;
 }
 
-static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
-                                           struct fuse_req *req)
+/*
+ * Called under fc->lock
+ *
+ * fc->connected must have been checked previously
+ */
+void fuse_request_send_background_locked(struct fuse_conn *fc,
+                                        struct fuse_req *req)
 {
-       BUG_ON(!req->background);
+       BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
+       if (!test_bit(FR_WAITING, &req->flags)) {
+               __set_bit(FR_WAITING, &req->flags);
+               atomic_inc(&fc->num_waiting);
+       }
+       __set_bit(FR_ISREPLY, &req->flags);
        fc->num_background++;
        if (fc->num_background == fc->max_background)
                fc->blocked = 1;
@@ -602,54 +601,40 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
        flush_bg_queue(fc);
 }
 
-static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 {
+       BUG_ON(!req->end);
        spin_lock(&fc->lock);
        if (fc->connected) {
-               fuse_request_send_nowait_locked(fc, req);
+               fuse_request_send_background_locked(fc, req);
                spin_unlock(&fc->lock);
        } else {
+               spin_unlock(&fc->lock);
                req->out.h.error = -ENOTCONN;
-               request_end(fc, req);
+               req->end(fc, req);
+               fuse_put_request(fc, req);
        }
 }
-
-void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
-{
-       req->isreply = 1;
-       fuse_request_send_nowait(fc, req);
-}
 EXPORT_SYMBOL_GPL(fuse_request_send_background);
 
 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
                                          struct fuse_req *req, u64 unique)
 {
        int err = -ENODEV;
+       struct fuse_iqueue *fiq = &fc->iq;
 
-       req->isreply = 0;
+       __clear_bit(FR_ISREPLY, &req->flags);
        req->in.h.unique = unique;
-       spin_lock(&fc->lock);
-       if (fc->connected) {
-               queue_request(fc, req);
+       spin_lock(&fiq->waitq.lock);
+       if (fiq->connected) {
+               queue_request(fiq, req);
                err = 0;
        }
-       spin_unlock(&fc->lock);
+       spin_unlock(&fiq->waitq.lock);
 
        return err;
 }
 
-/*
- * Called under fc->lock
- *
- * fc->connected must have been checked previously
- */
-void fuse_request_send_background_locked(struct fuse_conn *fc,
-                                        struct fuse_req *req)
-{
-       req->isreply = 1;
-       fuse_request_send_nowait_locked(fc, req);
-}
-
 void fuse_force_forget(struct file *file, u64 nodeid)
 {
        struct inode *inode = file_inode(file);
@@ -665,7 +650,7 @@ void fuse_force_forget(struct file *file, u64 nodeid)
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(inarg);
        req->in.args[0].value = &inarg;
-       req->isreply = 0;
+       __clear_bit(FR_ISREPLY, &req->flags);
        __fuse_request_send(fc, req);
        /* ignore errors */
        fuse_put_request(fc, req);
@@ -676,38 +661,39 @@ void fuse_force_forget(struct file *file, u64 nodeid)
  * anything that could cause a page-fault.  If the request was already
  * aborted bail out.
  */
-static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
+static int lock_request(struct fuse_req *req)
 {
        int err = 0;
        if (req) {
-               spin_lock(&fc->lock);
-               if (req->aborted)
+               spin_lock(&req->waitq.lock);
+               if (test_bit(FR_ABORTED, &req->flags))
                        err = -ENOENT;
                else
-                       req->locked = 1;
-               spin_unlock(&fc->lock);
+                       set_bit(FR_LOCKED, &req->flags);
+               spin_unlock(&req->waitq.lock);
        }
        return err;
 }
 
 /*
- * Unlock request.  If it was aborted during being locked, the
- * requester thread is currently waiting for it to be unlocked, so
- * wake it up.
+ * Unlock request.  If it was aborted while locked, caller is responsible
+ * for unlocking and ending the request.
  */
-static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
+static int unlock_request(struct fuse_req *req)
 {
+       int err = 0;
        if (req) {
-               spin_lock(&fc->lock);
-               req->locked = 0;
-               if (req->aborted)
-                       wake_up(&req->waitq);
-               spin_unlock(&fc->lock);
+               spin_lock(&req->waitq.lock);
+               if (test_bit(FR_ABORTED, &req->flags))
+                       err = -ENOENT;
+               else
+                       clear_bit(FR_LOCKED, &req->flags);
+               spin_unlock(&req->waitq.lock);
        }
+       return err;
 }
 
 struct fuse_copy_state {
-       struct fuse_conn *fc;
        int write;
        struct fuse_req *req;
        struct iov_iter *iter;
@@ -721,13 +707,10 @@ struct fuse_copy_state {
        unsigned move_pages:1;
 };
 
-static void fuse_copy_init(struct fuse_copy_state *cs,
-                          struct fuse_conn *fc,
-                          int write,
+static void fuse_copy_init(struct fuse_copy_state *cs, int write,
                           struct iov_iter *iter)
 {
        memset(cs, 0, sizeof(*cs));
-       cs->fc = fc;
        cs->write = write;
        cs->iter = iter;
 }
@@ -760,7 +743,10 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
        struct page *page;
        int err;
 
-       unlock_request(cs->fc, cs->req);
+       err = unlock_request(cs->req);
+       if (err)
+               return err;
+
        fuse_copy_finish(cs);
        if (cs->pipebufs) {
                struct pipe_buffer *buf = cs->pipebufs;
@@ -809,7 +795,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                iov_iter_advance(cs->iter, err);
        }
 
-       return lock_request(cs->fc, cs->req);
+       return lock_request(cs->req);
 }
 
 /* Do as much copy to/from userspace buffer as we can */
@@ -860,7 +846,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
        struct page *newpage;
        struct pipe_buffer *buf = cs->pipebufs;
 
-       unlock_request(cs->fc, cs->req);
+       err = unlock_request(cs->req);
+       if (err)
+               return err;
+
        fuse_copy_finish(cs);
 
        err = buf->ops->confirm(cs->pipe, buf);
@@ -914,12 +903,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
                lru_cache_add_file(newpage);
 
        err = 0;
-       spin_lock(&cs->fc->lock);
-       if (cs->req->aborted)
+       spin_lock(&cs->req->waitq.lock);
+       if (test_bit(FR_ABORTED, &cs->req->flags))
                err = -ENOENT;
        else
                *pagep = newpage;
-       spin_unlock(&cs->fc->lock);
+       spin_unlock(&cs->req->waitq.lock);
 
        if (err) {
                unlock_page(newpage);
@@ -939,7 +928,7 @@ out_fallback:
        cs->pg = buf->page;
        cs->offset = buf->offset;
 
-       err = lock_request(cs->fc, cs->req);
+       err = lock_request(cs->req);
        if (err)
                return err;
 
@@ -950,11 +939,15 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
                         unsigned offset, unsigned count)
 {
        struct pipe_buffer *buf;
+       int err;
 
        if (cs->nr_segs == cs->pipe->buffers)
                return -EIO;
 
-       unlock_request(cs->fc, cs->req);
+       err = unlock_request(cs->req);
+       if (err)
+               return err;
+
        fuse_copy_finish(cs);
 
        buf = cs->pipebufs;
@@ -1065,36 +1058,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
        return err;
 }
 
-static int forget_pending(struct fuse_conn *fc)
+static int forget_pending(struct fuse_iqueue *fiq)
 {
-       return fc->forget_list_head.next != NULL;
+       return fiq->forget_list_head.next != NULL;
 }
 
-static int request_pending(struct fuse_conn *fc)
+static int request_pending(struct fuse_iqueue *fiq)
 {
-       return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
-               forget_pending(fc);
-}
-
-/* Wait until a request is available on the pending list */
-static void request_wait(struct fuse_conn *fc)
-__releases(fc->lock)
-__acquires(fc->lock)
-{
-       DECLARE_WAITQUEUE(wait, current);
-
-       add_wait_queue_exclusive(&fc->waitq, &wait);
-       while (fc->connected && !request_pending(fc)) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (signal_pending(current))
-                       break;
-
-               spin_unlock(&fc->lock);
-               schedule();
-               spin_lock(&fc->lock);
-       }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(&fc->waitq, &wait);
+       return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
+               forget_pending(fiq);
 }
 
 /*
@@ -1103,11 +1075,12 @@ __acquires(fc->lock)
  * Unlike other requests this is assembled on demand, without a need
  * to allocate a separate fuse_req structure.
  *
- * Called with fc->lock held, releases it
+ * Called with fiq->waitq.lock held, releases it
  */
-static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
+static int fuse_read_interrupt(struct fuse_iqueue *fiq,
+                              struct fuse_copy_state *cs,
                               size_t nbytes, struct fuse_req *req)
-__releases(fc->lock)
+__releases(fiq->waitq.lock)
 {
        struct fuse_in_header ih;
        struct fuse_interrupt_in arg;
@@ -1115,7 +1088,7 @@ __releases(fc->lock)
        int err;
 
        list_del_init(&req->intr_entry);
-       req->intr_unique = fuse_get_unique(fc);
+       req->intr_unique = fuse_get_unique(fiq);
        memset(&ih, 0, sizeof(ih));
        memset(&arg, 0, sizeof(arg));
        ih.len = reqsize;
@@ -1123,7 +1096,7 @@ __releases(fc->lock)
        ih.unique = req->intr_unique;
        arg.unique = req->in.h.unique;
 
-       spin_unlock(&fc->lock);
+       spin_unlock(&fiq->waitq.lock);
        if (nbytes < reqsize)
                return -EINVAL;
 
@@ -1135,21 +1108,21 @@ __releases(fc->lock)
        return err ? err : reqsize;
 }
 
-static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
+static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
                                               unsigned max,
                                               unsigned *countp)
 {
-       struct fuse_forget_link *head = fc->forget_list_head.next;
+       struct fuse_forget_link *head = fiq->forget_list_head.next;
        struct fuse_forget_link **newhead = &head;
        unsigned count;
 
        for (count = 0; *newhead != NULL && count < max; count++)
                newhead = &(*newhead)->next;
 
-       fc->forget_list_head.next = *newhead;
+       fiq->forget_list_head.next = *newhead;
        *newhead = NULL;
-       if (fc->forget_list_head.next == NULL)
-               fc->forget_list_tail = &fc->forget_list_head;
+       if (fiq->forget_list_head.next == NULL)
+               fiq->forget_list_tail = &fiq->forget_list_head;
 
        if (countp != NULL)
                *countp = count;
@@ -1157,24 +1130,24 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
        return head;
 }
 
-static int fuse_read_single_forget(struct fuse_conn *fc,
+static int fuse_read_single_forget(struct fuse_iqueue *fiq,
                                   struct fuse_copy_state *cs,
                                   size_t nbytes)
-__releases(fc->lock)
+__releases(fiq->waitq.lock)
 {
        int err;
-       struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
+       struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
        struct fuse_forget_in arg = {
                .nlookup = forget->forget_one.nlookup,
        };
        struct fuse_in_header ih = {
                .opcode = FUSE_FORGET,
                .nodeid = forget->forget_one.nodeid,
-               .unique = fuse_get_unique(fc),
+               .unique = fuse_get_unique(fiq),
                .len = sizeof(ih) + sizeof(arg),
        };
 
-       spin_unlock(&fc->lock);
+       spin_unlock(&fiq->waitq.lock);
        kfree(forget);
        if (nbytes < ih.len)
                return -EINVAL;
@@ -1190,9 +1163,9 @@ __releases(fc->lock)
        return ih.len;
 }
 
-static int fuse_read_batch_forget(struct fuse_conn *fc,
+static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
                                   struct fuse_copy_state *cs, size_t nbytes)
-__releases(fc->lock)
+__releases(fiq->waitq.lock)
 {
        int err;
        unsigned max_forgets;
@@ -1201,18 +1174,18 @@ __releases(fc->lock)
        struct fuse_batch_forget_in arg = { .count = 0 };
        struct fuse_in_header ih = {
                .opcode = FUSE_BATCH_FORGET,
-               .unique = fuse_get_unique(fc),
+               .unique = fuse_get_unique(fiq),
                .len = sizeof(ih) + sizeof(arg),
        };
 
        if (nbytes < ih.len) {
-               spin_unlock(&fc->lock);
+               spin_unlock(&fiq->waitq.lock);
                return -EINVAL;
        }
 
        max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
-       head = dequeue_forget(fc, max_forgets, &count);
-       spin_unlock(&fc->lock);
+       head = dequeue_forget(fiq, max_forgets, &count);
+       spin_unlock(&fiq->waitq.lock);
 
        arg.count = count;
        ih.len += count * sizeof(struct fuse_forget_one);
@@ -1239,14 +1212,15 @@ __releases(fc->lock)
        return ih.len;
 }
 
-static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
+static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
+                           struct fuse_copy_state *cs,
                            size_t nbytes)
-__releases(fc->lock)
+__releases(fiq->waitq.lock)
 {
-       if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
-               return fuse_read_single_forget(fc, cs, nbytes);
+       if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
+               return fuse_read_single_forget(fiq, cs, nbytes);
        else
-               return fuse_read_batch_forget(fc, cs, nbytes);
+               return fuse_read_batch_forget(fiq, cs, nbytes);
 }
 
 /*
@@ -1258,46 +1232,51 @@ __releases(fc->lock)
  * request_end().  Otherwise add it to the processing list, and set
  * the 'sent' flag.
  */
-static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
+static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
                                struct fuse_copy_state *cs, size_t nbytes)
 {
-       int err;
+       ssize_t err;
+       struct fuse_conn *fc = fud->fc;
+       struct fuse_iqueue *fiq = &fc->iq;
+       struct fuse_pqueue *fpq = &fud->pq;
        struct fuse_req *req;
        struct fuse_in *in;
        unsigned reqsize;
 
  restart:
-       spin_lock(&fc->lock);
+       spin_lock(&fiq->waitq.lock);
        err = -EAGAIN;
-       if ((file->f_flags & O_NONBLOCK) && fc->connected &&
-           !request_pending(fc))
+       if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
+           !request_pending(fiq))
                goto err_unlock;
 
-       request_wait(fc);
-       err = -ENODEV;
-       if (!fc->connected)
+       err = wait_event_interruptible_exclusive_locked(fiq->waitq,
+                               !fiq->connected || request_pending(fiq));
+       if (err)
                goto err_unlock;
-       err = -ERESTARTSYS;
-       if (!request_pending(fc))
+
+       err = -ENODEV;
+       if (!fiq->connected)
                goto err_unlock;
 
-       if (!list_empty(&fc->interrupts)) {
-               req = list_entry(fc->interrupts.next, struct fuse_req,
+       if (!list_empty(&fiq->interrupts)) {
+               req = list_entry(fiq->interrupts.next, struct fuse_req,
                                 intr_entry);
-               return fuse_read_interrupt(fc, cs, nbytes, req);
+               return fuse_read_interrupt(fiq, cs, nbytes, req);
        }
 
-       if (forget_pending(fc)) {
-               if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
-                       return fuse_read_forget(fc, cs, nbytes);
+       if (forget_pending(fiq)) {
+               if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
+                       return fuse_read_forget(fc, fiq, cs, nbytes);
 
-               if (fc->forget_batch <= -8)
-                       fc->forget_batch = 16;
+               if (fiq->forget_batch <= -8)
+                       fiq->forget_batch = 16;
        }
 
-       req = list_entry(fc->pending.next, struct fuse_req, list);
-       req->state = FUSE_REQ_READING;
-       list_move(&req->list, &fc->io);
+       req = list_entry(fiq->pending.next, struct fuse_req, list);
+       clear_bit(FR_PENDING, &req->flags);
+       list_del_init(&req->list);
+       spin_unlock(&fiq->waitq.lock);
 
        in = &req->in;
        reqsize = in->h.len;
@@ -1310,37 +1289,48 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
                request_end(fc, req);
                goto restart;
        }
-       spin_unlock(&fc->lock);
+       spin_lock(&fpq->lock);
+       list_add(&req->list, &fpq->io);
+       spin_unlock(&fpq->lock);
        cs->req = req;
        err = fuse_copy_one(cs, &in->h, sizeof(in->h));
        if (!err)
                err = fuse_copy_args(cs, in->numargs, in->argpages,
                                     (struct fuse_arg *) in->args, 0);
        fuse_copy_finish(cs);
-       spin_lock(&fc->lock);
-       req->locked = 0;
-       if (req->aborted) {
-               request_end(fc, req);
-               return -ENODEV;
+       spin_lock(&fpq->lock);
+       clear_bit(FR_LOCKED, &req->flags);
+       if (!fpq->connected) {
+               err = -ENODEV;
+               goto out_end;
        }
        if (err) {
                req->out.h.error = -EIO;
-               request_end(fc, req);
-               return err;
+               goto out_end;
        }
-       if (!req->isreply)
-               request_end(fc, req);
-       else {
-               req->state = FUSE_REQ_SENT;
-               list_move_tail(&req->list, &fc->processing);
-               if (req->interrupted)
-                       queue_interrupt(fc, req);
-               spin_unlock(&fc->lock);
+       if (!test_bit(FR_ISREPLY, &req->flags)) {
+               err = reqsize;
+               goto out_end;
        }
+       list_move_tail(&req->list, &fpq->processing);
+       spin_unlock(&fpq->lock);
+       set_bit(FR_SENT, &req->flags);
+       /* matches barrier in request_wait_answer() */
+       smp_mb__after_atomic();
+       if (test_bit(FR_INTERRUPTED, &req->flags))
+               queue_interrupt(fiq, req);
+
        return reqsize;
 
+out_end:
+       if (!test_bit(FR_PRIVATE, &req->flags))
+               list_del_init(&req->list);
+       spin_unlock(&fpq->lock);
+       request_end(fc, req);
+       return err;
+
  err_unlock:
-       spin_unlock(&fc->lock);
+       spin_unlock(&fiq->waitq.lock);
        return err;
 }
 
@@ -1359,16 +1349,17 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
 {
        struct fuse_copy_state cs;
        struct file *file = iocb->ki_filp;
-       struct fuse_conn *fc = fuse_get_conn(file);
-       if (!fc)
+       struct fuse_dev *fud = fuse_get_dev(file);
+
+       if (!fud)
                return -EPERM;
 
        if (!iter_is_iovec(to))
                return -EINVAL;
 
-       fuse_copy_init(&cs, fc, 1, to);
+       fuse_copy_init(&cs, 1, to);
 
-       return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to));
+       return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
 }
 
 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
@@ -1380,18 +1371,19 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
        int do_wakeup = 0;
        struct pipe_buffer *bufs;
        struct fuse_copy_state cs;
-       struct fuse_conn *fc = fuse_get_conn(in);
-       if (!fc)
+       struct fuse_dev *fud = fuse_get_dev(in);
+
+       if (!fud)
                return -EPERM;
 
        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
        if (!bufs)
                return -ENOMEM;
 
-       fuse_copy_init(&cs, fc, 1, NULL);
+       fuse_copy_init(&cs, 1, NULL);
        cs.pipebufs = bufs;
        cs.pipe = pipe;
-       ret = fuse_dev_do_read(fc, in, &cs, len);
+       ret = fuse_dev_do_read(fud, in, &cs, len);
        if (ret < 0)
                goto out;
 
@@ -1830,11 +1822,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 }
 
 /* Look up request on processing list by unique ID */
-static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
+static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
 {
        struct fuse_req *req;
 
-       list_for_each_entry(req, &fc->processing, list) {
+       list_for_each_entry(req, &fpq->processing, list) {
                if (req->in.h.unique == unique || req->intr_unique == unique)
                        return req;
        }
@@ -1871,10 +1863,12 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
  * it from the list and copy the rest of the buffer to the request.
  * The request is finished by calling request_end()
  */
-static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
+static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
                                 struct fuse_copy_state *cs, size_t nbytes)
 {
        int err;
+       struct fuse_conn *fc = fud->fc;
+       struct fuse_pqueue *fpq = &fud->pq;
        struct fuse_req *req;
        struct fuse_out_header oh;
 
@@ -1902,63 +1896,60 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
        if (oh.error <= -1000 || oh.error > 0)
                goto err_finish;
 
-       spin_lock(&fc->lock);
+       spin_lock(&fpq->lock);
        err = -ENOENT;
-       if (!fc->connected)
-               goto err_unlock;
+       if (!fpq->connected)
+               goto err_unlock_pq;
 
-       req = request_find(fc, oh.unique);
+       req = request_find(fpq, oh.unique);
        if (!req)
-               goto err_unlock;
+               goto err_unlock_pq;
 
-       if (req->aborted) {
-               spin_unlock(&fc->lock);
-               fuse_copy_finish(cs);
-               spin_lock(&fc->lock);
-               request_end(fc, req);
-               return -ENOENT;
-       }
        /* Is it an interrupt reply? */
        if (req->intr_unique == oh.unique) {
+               spin_unlock(&fpq->lock);
+
                err = -EINVAL;
                if (nbytes != sizeof(struct fuse_out_header))
-                       goto err_unlock;
+                       goto err_finish;
 
                if (oh.error == -ENOSYS)
                        fc->no_interrupt = 1;
                else if (oh.error == -EAGAIN)
-                       queue_interrupt(fc, req);
+                       queue_interrupt(&fc->iq, req);
 
-               spin_unlock(&fc->lock);
                fuse_copy_finish(cs);
                return nbytes;
        }
 
-       req->state = FUSE_REQ_WRITING;
-       list_move(&req->list, &fc->io);
+       clear_bit(FR_SENT, &req->flags);
+       list_move(&req->list, &fpq->io);
        req->out.h = oh;
-       req->locked = 1;
+       set_bit(FR_LOCKED, &req->flags);
+       spin_unlock(&fpq->lock);
        cs->req = req;
        if (!req->out.page_replace)
                cs->move_pages = 0;
-       spin_unlock(&fc->lock);
 
        err = copy_out_args(cs, &req->out, nbytes);
        fuse_copy_finish(cs);
 
-       spin_lock(&fc->lock);
-       req->locked = 0;
-       if (!err) {
-               if (req->aborted)
-                       err = -ENOENT;
-       } else if (!req->aborted)
+       spin_lock(&fpq->lock);
+       clear_bit(FR_LOCKED, &req->flags);
+       if (!fpq->connected)
+               err = -ENOENT;
+       else if (err)
                req->out.h.error = -EIO;
+       if (!test_bit(FR_PRIVATE, &req->flags))
+               list_del_init(&req->list);
+       spin_unlock(&fpq->lock);
+
        request_end(fc, req);
 
        return err ? err : nbytes;
 
- err_unlock:
-       spin_unlock(&fc->lock);
+ err_unlock_pq:
+       spin_unlock(&fpq->lock);
  err_finish:
        fuse_copy_finish(cs);
        return err;
@@ -1967,16 +1958,17 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct fuse_copy_state cs;
-       struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
-       if (!fc)
+       struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
+
+       if (!fud)
                return -EPERM;
 
        if (!iter_is_iovec(from))
                return -EINVAL;
 
-       fuse_copy_init(&cs, fc, 0, from);
+       fuse_copy_init(&cs, 0, from);
 
-       return fuse_dev_do_write(fc, &cs, iov_iter_count(from));
+       return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
 }
 
 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
@@ -1987,12 +1979,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
        unsigned idx;
        struct pipe_buffer *bufs;
        struct fuse_copy_state cs;
-       struct fuse_conn *fc;
+       struct fuse_dev *fud;
        size_t rem;
        ssize_t ret;
 
-       fc = fuse_get_conn(out);
-       if (!fc)
+       fud = fuse_get_dev(out);
+       if (!fud)
                return -EPERM;
 
        bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
@@ -2039,7 +2031,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
        }
        pipe_unlock(pipe);
 
-       fuse_copy_init(&cs, fc, 0, NULL);
+       fuse_copy_init(&cs, 0, NULL);
        cs.pipebufs = bufs;
        cs.nr_segs = nbuf;
        cs.pipe = pipe;
@@ -2047,7 +2039,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
        if (flags & SPLICE_F_MOVE)
                cs.move_pages = 1;
 
-       ret = fuse_dev_do_write(fc, &cs, len);
+       ret = fuse_dev_do_write(fud, &cs, len);
 
        for (idx = 0; idx < nbuf; idx++) {
                struct pipe_buffer *buf = &bufs[idx];
@@ -2061,18 +2053,21 @@ out:
 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
 {
        unsigned mask = POLLOUT | POLLWRNORM;
-       struct fuse_conn *fc = fuse_get_conn(file);
-       if (!fc)
+       struct fuse_iqueue *fiq;
+       struct fuse_dev *fud = fuse_get_dev(file);
+
+       if (!fud)
                return POLLERR;
 
-       poll_wait(file, &fc->waitq, wait);
+       fiq = &fud->fc->iq;
+       poll_wait(file, &fiq->waitq, wait);
 
-       spin_lock(&fc->lock);
-       if (!fc->connected)
+       spin_lock(&fiq->waitq.lock);
+       if (!fiq->connected)
                mask = POLLERR;
-       else if (request_pending(fc))
+       else if (request_pending(fiq))
                mask |= POLLIN | POLLRDNORM;
-       spin_unlock(&fc->lock);
+       spin_unlock(&fiq->waitq.lock);
 
        return mask;
 }
@@ -2083,67 +2078,18 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
  * This function releases and reacquires fc->lock
  */
 static void end_requests(struct fuse_conn *fc, struct list_head *head)
-__releases(fc->lock)
-__acquires(fc->lock)
 {
        while (!list_empty(head)) {
                struct fuse_req *req;
                req = list_entry(head->next, struct fuse_req, list);
                req->out.h.error = -ECONNABORTED;
-               request_end(fc, req);
-               spin_lock(&fc->lock);
-       }
-}
-
-/*
- * Abort requests under I/O
- *
- * The requests are set to aborted and finished, and the request
- * waiter is woken up.  This will make request_wait_answer() wait
- * until the request is unlocked and then return.
- *
- * If the request is asynchronous, then the end function needs to be
- * called after waiting for the request to be unlocked (if it was
- * locked).
- */
-static void end_io_requests(struct fuse_conn *fc)
-__releases(fc->lock)
-__acquires(fc->lock)
-{
-       while (!list_empty(&fc->io)) {
-               struct fuse_req *req =
-                       list_entry(fc->io.next, struct fuse_req, list);
-               void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
-
-               req->aborted = 1;
-               req->out.h.error = -ECONNABORTED;
-               req->state = FUSE_REQ_FINISHED;
+               clear_bit(FR_PENDING, &req->flags);
+               clear_bit(FR_SENT, &req->flags);
                list_del_init(&req->list);
-               wake_up(&req->waitq);
-               if (end) {
-                       req->end = NULL;
-                       __fuse_get_request(req);
-                       spin_unlock(&fc->lock);
-                       wait_event(req->waitq, !req->locked);
-                       end(fc, req);
-                       fuse_put_request(fc, req);
-                       spin_lock(&fc->lock);
-               }
+               request_end(fc, req);
        }
 }
 
-static void end_queued_requests(struct fuse_conn *fc)
-__releases(fc->lock)
-__acquires(fc->lock)
-{
-       fc->max_background = UINT_MAX;
-       flush_bg_queue(fc);
-       end_requests(fc, &fc->pending);
-       end_requests(fc, &fc->processing);
-       while (forget_pending(fc))
-               kfree(dequeue_forget(fc, 1, NULL));
-}
-
 static void end_polls(struct fuse_conn *fc)
 {
        struct rb_node *p;
@@ -2162,67 +2108,156 @@ static void end_polls(struct fuse_conn *fc)
 /*
  * Abort all requests.
  *
- * Emergency exit in case of a malicious or accidental deadlock, or
- * just a hung filesystem.
+ * Emergency exit in case of a malicious or accidental deadlock, or just a hung
+ * filesystem.
  *
- * The same effect is usually achievable through killing the
- * filesystem daemon and all users of the filesystem.  The exception
- * is the combination of an asynchronous request and the tricky
- * deadlock (see Documentation/filesystems/fuse.txt).
+ * The same effect is usually achievable through killing the filesystem daemon
+ * and all users of the filesystem.  The exception is the combination of an
+ * asynchronous request and the tricky deadlock (see
+ * Documentation/filesystems/fuse.txt).
  *
- * During the aborting, progression of requests from the pending and
- * processing lists onto the io list, and progression of new requests
- * onto the pending list is prevented by req->connected being false.
- *
- * Progression of requests under I/O to the processing list is
- * prevented by the req->aborted flag being true for these requests.
- * For this reason requests on the io list must be aborted first.
+ * Aborting requests under I/O goes as follows: 1: Separate out unlocked
+ * requests, they should be finished off immediately.  Locked requests will be
+ * finished after unlock; see unlock_request(). 2: Finish off the unlocked
+ * requests.  It is possible that some request will finish before we can.  This
+ * is OK, the request will in that case be removed from the list before we touch
+ * it.
  */
 void fuse_abort_conn(struct fuse_conn *fc)
 {
+       struct fuse_iqueue *fiq = &fc->iq;
+
        spin_lock(&fc->lock);
        if (fc->connected) {
+               struct fuse_dev *fud;
+               struct fuse_req *req, *next;
+               LIST_HEAD(to_end1);
+               LIST_HEAD(to_end2);
+
                fc->connected = 0;
                fc->blocked = 0;
                fuse_set_initialized(fc);
-               end_io_requests(fc);
-               end_queued_requests(fc);
+               list_for_each_entry(fud, &fc->devices, entry) {
+                       struct fuse_pqueue *fpq = &fud->pq;
+
+                       spin_lock(&fpq->lock);
+                       fpq->connected = 0;
+                       list_for_each_entry_safe(req, next, &fpq->io, list) {
+                               req->out.h.error = -ECONNABORTED;
+                               spin_lock(&req->waitq.lock);
+                               set_bit(FR_ABORTED, &req->flags);
+                               if (!test_bit(FR_LOCKED, &req->flags)) {
+                                       set_bit(FR_PRIVATE, &req->flags);
+                                       list_move(&req->list, &to_end1);
+                               }
+                               spin_unlock(&req->waitq.lock);
+                       }
+                       list_splice_init(&fpq->processing, &to_end2);
+                       spin_unlock(&fpq->lock);
+               }
+               fc->max_background = UINT_MAX;
+               flush_bg_queue(fc);
+
+               spin_lock(&fiq->waitq.lock);
+               fiq->connected = 0;
+               list_splice_init(&fiq->pending, &to_end2);
+               while (forget_pending(fiq))
+                       kfree(dequeue_forget(fiq, 1, NULL));
+               wake_up_all_locked(&fiq->waitq);
+               spin_unlock(&fiq->waitq.lock);
+               kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
                end_polls(fc);
-               wake_up_all(&fc->waitq);
                wake_up_all(&fc->blocked_waitq);
-               kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+               spin_unlock(&fc->lock);
+
+               while (!list_empty(&to_end1)) {
+                       req = list_first_entry(&to_end1, struct fuse_req, list);
+                       __fuse_get_request(req);
+                       list_del_init(&req->list);
+                       request_end(fc, req);
+               }
+               end_requests(fc, &to_end2);
+       } else {
+               spin_unlock(&fc->lock);
        }
-       spin_unlock(&fc->lock);
 }
 EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
 int fuse_dev_release(struct inode *inode, struct file *file)
 {
-       struct fuse_conn *fc = fuse_get_conn(file);
-       if (fc) {
-               spin_lock(&fc->lock);
-               fc->connected = 0;
-               fc->blocked = 0;
-               fuse_set_initialized(fc);
-               end_queued_requests(fc);
-               end_polls(fc);
-               wake_up_all(&fc->blocked_waitq);
-               spin_unlock(&fc->lock);
-               fuse_conn_put(fc);
-       }
+       struct fuse_dev *fud = fuse_get_dev(file);
 
+       if (fud) {
+               struct fuse_conn *fc = fud->fc;
+               struct fuse_pqueue *fpq = &fud->pq;
+
+               WARN_ON(!list_empty(&fpq->io));
+               end_requests(fc, &fpq->processing);
+               /* Are we the last open device? */
+               if (atomic_dec_and_test(&fc->dev_count)) {
+                       WARN_ON(fc->iq.fasync != NULL);
+                       fuse_abort_conn(fc);
+               }
+               fuse_dev_free(fud);
+       }
        return 0;
 }
 EXPORT_SYMBOL_GPL(fuse_dev_release);
 
 static int fuse_dev_fasync(int fd, struct file *file, int on)
 {
-       struct fuse_conn *fc = fuse_get_conn(file);
-       if (!fc)
+       struct fuse_dev *fud = fuse_get_dev(file);
+
+       if (!fud)
                return -EPERM;
 
        /* No locking - fasync_helper does its own locking */
-       return fasync_helper(fd, file, on, &fc->fasync);
+       return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
+}
+
+static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
+{
+       struct fuse_dev *fud;
+
+       if (new->private_data)
+               return -EINVAL;
+
+       fud = fuse_dev_alloc(fc);
+       if (!fud)
+               return -ENOMEM;
+
+       new->private_data = fud;
+       atomic_inc(&fc->dev_count);
+
+       return 0;
+}
+
+static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
+                          unsigned long arg)
+{
+       int err = -ENOTTY;
+
+       if (cmd == FUSE_DEV_IOC_CLONE) {
+               int oldfd;
+
+               err = -EFAULT;
+               if (!get_user(oldfd, (__u32 __user *) arg)) {
+                       struct file *old = fget(oldfd);
+
+                       err = -EINVAL;
+                       if (old) {
+                               struct fuse_dev *fud = fuse_get_dev(old);
+
+                               if (fud) {
+                                       mutex_lock(&fuse_mutex);
+                                       err = fuse_device_clone(fud->fc, file);
+                                       mutex_unlock(&fuse_mutex);
+                               }
+                               fput(old);
+                       }
+               }
+       }
+       return err;
 }
 
 const struct file_operations fuse_dev_operations = {
@@ -2236,6 +2271,8 @@ const struct file_operations fuse_dev_operations = {
        .poll           = fuse_dev_poll,
        .release        = fuse_dev_release,
        .fasync         = fuse_dev_fasync,
+       .unlocked_ioctl = fuse_dev_ioctl,
+       .compat_ioctl   = fuse_dev_ioctl,
 };
 EXPORT_SYMBOL_GPL(fuse_dev_operations);
 
index 8c5e2fa68835a07216d250dc9536aafaa1063dc2..014fa8ba2b5189e923557c446be8f150921f9b28 100644 (file)
@@ -96,17 +96,17 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
                         * Drop the release request when client does not
                         * implement 'open'
                         */
-                       req->background = 0;
+                       __clear_bit(FR_BACKGROUND, &req->flags);
                        iput(req->misc.release.inode);
                        fuse_put_request(ff->fc, req);
                } else if (sync) {
-                       req->background = 0;
+                       __clear_bit(FR_BACKGROUND, &req->flags);
                        fuse_request_send(ff->fc, req);
                        iput(req->misc.release.inode);
                        fuse_put_request(ff->fc, req);
                } else {
                        req->end = fuse_release_end;
-                       req->background = 1;
+                       __set_bit(FR_BACKGROUND, &req->flags);
                        fuse_request_send_background(ff->fc, req);
                }
                kfree(ff);
@@ -299,8 +299,8 @@ void fuse_sync_release(struct fuse_file *ff, int flags)
 {
        WARN_ON(atomic_read(&ff->count) > 1);
        fuse_prepare_release(ff, flags, FUSE_RELEASE);
-       ff->reserved_req->force = 1;
-       ff->reserved_req->background = 0;
+       __set_bit(FR_FORCE, &ff->reserved_req->flags);
+       __clear_bit(FR_BACKGROUND, &ff->reserved_req->flags);
        fuse_request_send(ff->fc, ff->reserved_req);
        fuse_put_request(ff->fc, ff->reserved_req);
        kfree(ff);
@@ -426,7 +426,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(inarg);
        req->in.args[0].value = &inarg;
-       req->force = 1;
+       __set_bit(FR_FORCE, &req->flags);
        fuse_request_send(fc, req);
        err = req->out.h.error;
        fuse_put_request(fc, req);
@@ -1611,7 +1611,8 @@ static int fuse_writepage_locked(struct page *page)
        if (!req)
                goto err;
 
-       req->background = 1; /* writeback always goes to bg_queue */
+       /* writeback always goes to bg_queue */
+       __set_bit(FR_BACKGROUND, &req->flags);
        tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
        if (!tmp_page)
                goto err_free;
@@ -1742,8 +1743,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
                }
        }
 
-       if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
-                                       old_req->state == FUSE_REQ_PENDING)) {
+       if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) {
                struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
 
                copy_highpage(old_req->pages[0], page);
@@ -1830,7 +1830,7 @@ static int fuse_writepages_fill(struct page *page,
                req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
                req->misc.write.next = NULL;
                req->in.argpages = 1;
-               req->background = 1;
+               __set_bit(FR_BACKGROUND, &req->flags);
                req->num_pages = 0;
                req->end = fuse_writepage_end;
                req->inode = inode;
index 7354dc142a50845a62e9a413d82d185afc1f5b0d..405113101db8d868fcb40c34199978be576d0961 100644 (file)
@@ -241,16 +241,6 @@ struct fuse_args {
 
 #define FUSE_ARGS(args) struct fuse_args args = {}
 
-/** The request state */
-enum fuse_req_state {
-       FUSE_REQ_INIT = 0,
-       FUSE_REQ_PENDING,
-       FUSE_REQ_READING,
-       FUSE_REQ_SENT,
-       FUSE_REQ_WRITING,
-       FUSE_REQ_FINISHED
-};
-
 /** The request IO state (for asynchronous processing) */
 struct fuse_io_priv {
        int async;
@@ -266,8 +256,41 @@ struct fuse_io_priv {
        struct completion *done;
 };
 
+/**
+ * Request flags
+ *
+ * FR_ISREPLY:         set if the request has reply
+ * FR_FORCE:           force sending of the request even if interrupted
+ * FR_BACKGROUND:      request is sent in the background
+ * FR_WAITING:         request is counted as "waiting"
+ * FR_ABORTED:         the request was aborted
+ * FR_INTERRUPTED:     the request has been interrupted
+ * FR_LOCKED:          data is being copied to/from the request
+ * FR_PENDING:         request is not yet in userspace
+ * FR_SENT:            request is in userspace, waiting for an answer
+ * FR_FINISHED:                request is finished
+ * FR_PRIVATE:         request is on private list
+ */
+enum fuse_req_flag {
+       FR_ISREPLY,
+       FR_FORCE,
+       FR_BACKGROUND,
+       FR_WAITING,
+       FR_ABORTED,
+       FR_INTERRUPTED,
+       FR_LOCKED,
+       FR_PENDING,
+       FR_SENT,
+       FR_FINISHED,
+       FR_PRIVATE,
+};
+
 /**
  * A request to the client
+ *
+ * .waitq.lock protects the following fields:
+ *   - FR_ABORTED
+ *   - FR_LOCKED (may also be modified under fc->lock, tested under both)
  */
 struct fuse_req {
        /** This can be on either pending processing or io lists in
@@ -283,35 +306,8 @@ struct fuse_req {
        /** Unique ID for the interrupt request */
        u64 intr_unique;
 
-       /*
-        * The following bitfields are either set once before the
-        * request is queued or setting/clearing them is protected by
-        * fuse_conn->lock
-        */
-
-       /** True if the request has reply */
-       unsigned isreply:1;
-
-       /** Force sending of the request even if interrupted */
-       unsigned force:1;
-
-       /** The request was aborted */
-       unsigned aborted:1;
-
-       /** Request is sent in the background */
-       unsigned background:1;
-
-       /** The request has been interrupted */
-       unsigned interrupted:1;
-
-       /** Data is being copied to/from the request */
-       unsigned locked:1;
-
-       /** Request is counted as "waiting" */
-       unsigned waiting:1;
-
-       /** State of the request */
-       enum fuse_req_state state;
+       /* Request flags, updated with test/set/clear_bit() */
+       unsigned long flags;
 
        /** The request input */
        struct fuse_in in;
@@ -380,6 +376,61 @@ struct fuse_req {
        struct file *stolen_file;
 };
 
+struct fuse_iqueue {
+       /** Connection established */
+       unsigned connected;
+
+       /** Readers of the connection are waiting on this */
+       wait_queue_head_t waitq;
+
+       /** The next unique request id */
+       u64 reqctr;
+
+       /** The list of pending requests */
+       struct list_head pending;
+
+       /** Pending interrupts */
+       struct list_head interrupts;
+
+       /** Queue of pending forgets */
+       struct fuse_forget_link forget_list_head;
+       struct fuse_forget_link *forget_list_tail;
+
+       /** Batching of FORGET requests (positive indicates FORGET batch) */
+       int forget_batch;
+
+       /** O_ASYNC requests */
+       struct fasync_struct *fasync;
+};
+
+struct fuse_pqueue {
+       /** Connection established */
+       unsigned connected;
+
+       /** Lock protecting accessess to  members of this structure */
+       spinlock_t lock;
+
+       /** The list of requests being processed */
+       struct list_head processing;
+
+       /** The list of requests under I/O */
+       struct list_head io;
+};
+
+/**
+ * Fuse device instance
+ */
+struct fuse_dev {
+       /** Fuse connection for this device */
+       struct fuse_conn *fc;
+
+       /** Processing queue */
+       struct fuse_pqueue pq;
+
+       /** list entry on fc->devices */
+       struct list_head entry;
+};
+
 /**
  * A Fuse connection.
  *
@@ -394,6 +445,9 @@ struct fuse_conn {
        /** Refcount */
        atomic_t count;
 
+       /** Number of fuse_dev's */
+       atomic_t dev_count;
+
        struct rcu_head rcu;
 
        /** The user id for this mount */
@@ -411,17 +465,8 @@ struct fuse_conn {
        /** Maximum write size */
        unsigned max_write;
 
-       /** Readers of the connection are waiting on this */
-       wait_queue_head_t waitq;
-
-       /** The list of pending requests */
-       struct list_head pending;
-
-       /** The list of requests being processed */
-       struct list_head processing;
-
-       /** The list of requests under I/O */
-       struct list_head io;
+       /** Input queue */
+       struct fuse_iqueue iq;
 
        /** The next unique kernel file handle */
        u64 khctr;
@@ -444,16 +489,6 @@ struct fuse_conn {
        /** The list of background requests set aside for later queuing */
        struct list_head bg_queue;
 
-       /** Pending interrupts */
-       struct list_head interrupts;
-
-       /** Queue of pending forgets */
-       struct fuse_forget_link forget_list_head;
-       struct fuse_forget_link *forget_list_tail;
-
-       /** Batching of FORGET requests (positive indicates FORGET batch) */
-       int forget_batch;
-
        /** Flag indicating that INIT reply has been received. Allocating
         * any fuse request will be suspended until the flag is set */
        int initialized;
@@ -469,9 +504,6 @@ struct fuse_conn {
        /** waitq for reserved requests */
        wait_queue_head_t reserved_req_waitq;
 
-       /** The next unique request id */
-       u64 reqctr;
-
        /** Connection established, cleared on umount, connection
            abort and device release */
        unsigned connected;
@@ -594,9 +626,6 @@ struct fuse_conn {
        /** number of dentries used in the above array */
        int ctl_ndents;
 
-       /** O_ASYNC requests */
-       struct fasync_struct *fasync;
-
        /** Key for lock owner ID scrambling */
        u32 scramble_key[4];
 
@@ -614,6 +643,9 @@ struct fuse_conn {
 
        /** Read/write semaphore to hold when accessing sb. */
        struct rw_semaphore killsb;
+
+       /** List of device instances belonging to this connection */
+       struct list_head devices;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -826,6 +858,9 @@ void fuse_conn_init(struct fuse_conn *fc);
  */
 void fuse_conn_put(struct fuse_conn *fc);
 
+struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc);
+void fuse_dev_free(struct fuse_dev *fud);
+
 /**
  * Add connection to control filesystem
  */
index 082ac1c97f397f7f8014c7896123f8c7a5eebe8f..ac81f48ab2f42e957b532d1162de0e1c9dba9cf6 100644 (file)
@@ -362,8 +362,8 @@ static void fuse_send_destroy(struct fuse_conn *fc)
        if (req && fc->conn_init) {
                fc->destroy_req = NULL;
                req->in.h.opcode = FUSE_DESTROY;
-               req->force = 1;
-               req->background = 0;
+               __set_bit(FR_FORCE, &req->flags);
+               __clear_bit(FR_BACKGROUND, &req->flags);
                fuse_request_send(fc, req);
                fuse_put_request(fc, req);
        }
@@ -567,30 +567,46 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
        return 0;
 }
 
+static void fuse_iqueue_init(struct fuse_iqueue *fiq)
+{
+       memset(fiq, 0, sizeof(struct fuse_iqueue));
+       init_waitqueue_head(&fiq->waitq);
+       INIT_LIST_HEAD(&fiq->pending);
+       INIT_LIST_HEAD(&fiq->interrupts);
+       fiq->forget_list_tail = &fiq->forget_list_head;
+       fiq->connected = 1;
+}
+
+static void fuse_pqueue_init(struct fuse_pqueue *fpq)
+{
+       memset(fpq, 0, sizeof(struct fuse_pqueue));
+       spin_lock_init(&fpq->lock);
+       INIT_LIST_HEAD(&fpq->processing);
+       INIT_LIST_HEAD(&fpq->io);
+       fpq->connected = 1;
+}
+
 void fuse_conn_init(struct fuse_conn *fc)
 {
        memset(fc, 0, sizeof(*fc));
        spin_lock_init(&fc->lock);
        init_rwsem(&fc->killsb);
        atomic_set(&fc->count, 1);
-       init_waitqueue_head(&fc->waitq);
+       atomic_set(&fc->dev_count, 1);
        init_waitqueue_head(&fc->blocked_waitq);
        init_waitqueue_head(&fc->reserved_req_waitq);
-       INIT_LIST_HEAD(&fc->pending);
-       INIT_LIST_HEAD(&fc->processing);
-       INIT_LIST_HEAD(&fc->io);
-       INIT_LIST_HEAD(&fc->interrupts);
+       fuse_iqueue_init(&fc->iq);
        INIT_LIST_HEAD(&fc->bg_queue);
        INIT_LIST_HEAD(&fc->entry);
-       fc->forget_list_tail = &fc->forget_list_head;
+       INIT_LIST_HEAD(&fc->devices);
        atomic_set(&fc->num_waiting, 0);
        fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
        fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
        fc->khctr = 0;
        fc->polled_files = RB_ROOT;
-       fc->reqctr = 0;
        fc->blocked = 0;
        fc->initialized = 0;
+       fc->connected = 1;
        fc->attr_version = 1;
        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
 }
@@ -930,6 +946,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 
 static void fuse_free_conn(struct fuse_conn *fc)
 {
+       WARN_ON(!list_empty(&fc->devices));
        kfree_rcu(fc, rcu);
 }
 
@@ -975,8 +992,42 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
        return 0;
 }
 
+struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
+{
+       struct fuse_dev *fud;
+
+       fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
+       if (fud) {
+               fud->fc = fuse_conn_get(fc);
+               fuse_pqueue_init(&fud->pq);
+
+               spin_lock(&fc->lock);
+               list_add_tail(&fud->entry, &fc->devices);
+               spin_unlock(&fc->lock);
+       }
+
+       return fud;
+}
+EXPORT_SYMBOL_GPL(fuse_dev_alloc);
+
+void fuse_dev_free(struct fuse_dev *fud)
+{
+       struct fuse_conn *fc = fud->fc;
+
+       if (fc) {
+               spin_lock(&fc->lock);
+               list_del(&fud->entry);
+               spin_unlock(&fc->lock);
+
+               fuse_conn_put(fc);
+       }
+       kfree(fud);
+}
+EXPORT_SYMBOL_GPL(fuse_dev_free);
+
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
+       struct fuse_dev *fud;
        struct fuse_conn *fc;
        struct inode *root;
        struct fuse_mount_data d;
@@ -1026,12 +1077,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
                goto err_fput;
 
        fuse_conn_init(fc);
+       fc->release = fuse_free_conn;
+
+       fud = fuse_dev_alloc(fc);
+       if (!fud)
+               goto err_put_conn;
 
        fc->dev = sb->s_dev;
        fc->sb = sb;
        err = fuse_bdi_init(fc, sb);
        if (err)
-               goto err_put_conn;
+               goto err_dev_free;
 
        sb->s_bdi = &fc->bdi;
 
@@ -1040,7 +1096,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
                fc->dont_mask = 1;
        sb->s_flags |= MS_POSIXACL;
 
-       fc->release = fuse_free_conn;
        fc->flags = d.flags;
        fc->user_id = d.user_id;
        fc->group_id = d.group_id;
@@ -1053,14 +1108,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        root = fuse_get_root_inode(sb, d.rootmode);
        root_dentry = d_make_root(root);
        if (!root_dentry)
-               goto err_put_conn;
+               goto err_dev_free;
        /* only now - we want root dentry with NULL ->d_op */
        sb->s_d_op = &fuse_dentry_operations;
 
        init_req = fuse_request_alloc(0);
        if (!init_req)
                goto err_put_root;
-       init_req->background = 1;
+       __set_bit(FR_BACKGROUND, &init_req->flags);
 
        if (is_bdev) {
                fc->destroy_req = fuse_request_alloc(0);
@@ -1079,8 +1134,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
        list_add_tail(&fc->entry, &fuse_conn_list);
        sb->s_root = root_dentry;
-       fc->connected = 1;
-       file->private_data = fuse_conn_get(fc);
+       file->private_data = fud;
        mutex_unlock(&fuse_mutex);
        /*
         * atomic_dec_and_test() in fput() provides the necessary
@@ -1099,6 +1153,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        fuse_request_free(init_req);
  err_put_root:
        dput(root_dentry);
+ err_dev_free:
+       fuse_dev_free(fud);
  err_put_conn:
        fuse_bdi_destroy(fc);
        fuse_conn_put(fc);
index 8d129bb7355afbb2ca7f1904ff0263f604e86dd6..682529c009966b85f986955c04d2b48fb645e981 100644 (file)
@@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
  * pg_authenticate method for nfsv4 callback threads.
  *
  * The authflavor has been negotiated, so an incorrect flavor is a server
- * bug. Drop packets with incorrect authflavor.
+ * bug. Deny packets with incorrect authflavor.
  *
  * All other checking done after NFS decoding where the nfs_client can be
  * found in nfs4_callback_compound
@@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
        switch (rqstp->rq_authop->flavour) {
        case RPC_AUTH_NULL:
                if (rqstp->rq_proc != CB_NULL)
-                       return SVC_DROP;
+                       return SVC_DENIED;
                break;
        case RPC_AUTH_GSS:
                /* No RPC_AUTH_GSS support yet in NFSv4.1 */
                 if (svc_is_backchannel(rqstp))
-                       return SVC_DROP;
+                       return SVC_DENIED;
        }
        return SVC_OK;
 }
index 197806fb87ffb459c19f3c4bbc8da50c58c870dc..29e3c1b011b73e4661f4deb1ef200e2f8d27792b 100644 (file)
@@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
        dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
 
        /* Normal */
-       if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
-               slot->seq_nr++;
+       if (likely(args->csa_sequenceid == slot->seq_nr + 1))
                goto out_ok;
-       }
 
        /* Replay */
        if (args->csa_sequenceid == slot->seq_nr) {
@@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
                              struct cb_process_state *cps)
 {
        struct nfs4_slot_table *tbl;
+       struct nfs4_slot *slot;
        struct nfs_client *clp;
        int i;
        __be32 status = htonl(NFS4ERR_BADSESSION);
@@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 
        if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
                goto out;
+
        tbl = &clp->cl_session->bc_slot_table;
+       slot = tbl->slots + args->csa_slotid;
 
        spin_lock(&tbl->slot_tbl_lock);
        /* state manager is resetting the session */
        if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
-               spin_unlock(&tbl->slot_tbl_lock);
                status = htonl(NFS4ERR_DELAY);
                /* Return NFS4ERR_BADSESSION if we're draining the session
                 * in order to reset it.
                 */
                if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
                        status = htonl(NFS4ERR_BADSESSION);
-               goto out;
+               goto out_unlock;
        }
 
-       status = validate_seqid(&clp->cl_session->bc_slot_table, args);
-       spin_unlock(&tbl->slot_tbl_lock);
+       memcpy(&res->csr_sessionid, &args->csa_sessionid,
+              sizeof(res->csr_sessionid));
+       res->csr_sequenceid = args->csa_sequenceid;
+       res->csr_slotid = args->csa_slotid;
+       res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+       res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+
+       status = validate_seqid(tbl, args);
        if (status)
-               goto out;
+               goto out_unlock;
 
        cps->slotid = args->csa_slotid;
 
@@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
         */
        if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
                status = htonl(NFS4ERR_DELAY);
-               goto out;
+               goto out_unlock;
        }
 
-       memcpy(&res->csr_sessionid, &args->csa_sessionid,
-              sizeof(res->csr_sessionid));
-       res->csr_sequenceid = args->csa_sequenceid;
-       res->csr_slotid = args->csa_slotid;
-       res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
-       res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+       /*
+        * RFC5661 20.9.3
+        * If CB_SEQUENCE returns an error, then the state of the slot
+        * (sequence ID, cached reply) MUST NOT change.
+        */
+       slot->seq_nr++;
+out_unlock:
+       spin_unlock(&tbl->slot_tbl_lock);
 
 out:
        cps->clp = clp; /* put in nfs4_callback_compound */
index 19ca95cdfd9b0f26aedbbc23f036babf2aeca67a..6b1697a01dde35e1384d72a4a7cd2e02d1afbf84 100644 (file)
@@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
        xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
 
        status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
-       if (status == __constant_htonl(NFS4ERR_RESOURCE))
+       if (status == htonl(NFS4ERR_RESOURCE))
                return rpc_garbage_args;
 
        if (hdr_arg.minorversion == 0) {
index 892aefff36300a0861f9bf5d43cb7a4b5069d873..ecebb406cc1aec554ce780f2c3680eddc351e8da 100644 (file)
@@ -825,7 +825,6 @@ error:
  * Load up the server record from information gained in an fsinfo record
  */
 static void nfs_server_set_fsinfo(struct nfs_server *server,
-                                 struct nfs_fh *mntfh,
                                  struct nfs_fsinfo *fsinfo)
 {
        unsigned long max_rpc_payload;
@@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
        if (error < 0)
                goto out_error;
 
-       nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+       nfs_server_set_fsinfo(server, &fsinfo);
 
        /* Get some general file system info */
        if (server->namelen == 0) {
@@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net)
 }
 
 #ifdef CONFIG_PROC_FS
-static struct proc_dir_entry *proc_fs_nfs;
-
 static int nfs_server_list_open(struct inode *inode, struct file *file);
 static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
 static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
@@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
 {
        struct nfs_server *server;
        struct nfs_client *clp;
-       char dev[8], fsid[17];
+       char dev[13];   // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0'
+       char fsid[34];  // 2 * 16 for %llx, 1 for ':', 1 for '\0'
        struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
 
        /* display header on line 1 */
        if (v == &nn->nfs_volume_list) {
-               seq_puts(m, "NV SERVER   PORT DEV     FSID              FSC\n");
+               seq_puts(m, "NV SERVER   PORT DEV          FSID"
+                           "                              FSC\n");
                return 0;
        }
        /* display one transport per line on subsequent lines */
        server = list_entry(v, struct nfs_server, master_link);
        clp = server->nfs_client;
 
-       snprintf(dev, 8, "%u:%u",
+       snprintf(dev, sizeof(dev), "%u:%u",
                 MAJOR(server->s_dev), MINOR(server->s_dev));
 
-       snprintf(fsid, 17, "%llx:%llx",
+       snprintf(fsid, sizeof(fsid), "%llx:%llx",
                 (unsigned long long) server->fsid.major,
                 (unsigned long long) server->fsid.minor);
 
        rcu_read_lock();
-       seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
+       seq_printf(m, "v%u %s %s %-12s %-33s %s\n",
                   clp->rpc_ops->version,
                   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
                   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
@@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net)
  */
 int __init nfs_fs_proc_init(void)
 {
-       struct proc_dir_entry *p;
-
-       proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
-       if (!proc_fs_nfs)
+       if (!proc_mkdir("fs/nfsfs", NULL))
                goto error_0;
 
        /* a file of servers with which we're dealing */
-       p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
-       if (!p)
+       if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers"))
                goto error_1;
 
        /* a file of volumes that we have mounted */
-       p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
-       if (!p)
-               goto error_2;
-       return 0;
+       if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes"))
+               goto error_1;
 
-error_2:
-       remove_proc_entry("servers", proc_fs_nfs);
+       return 0;
 error_1:
-       remove_proc_entry("fs/nfsfs", NULL);
+       remove_proc_subtree("fs/nfsfs", NULL);
 error_0:
        return -ENOMEM;
 }
@@ -1464,9 +1456,7 @@ error_0:
  */
 void nfs_fs_proc_exit(void)
 {
-       remove_proc_entry("volumes", proc_fs_nfs);
-       remove_proc_entry("servers", proc_fs_nfs);
-       remove_proc_entry("fs/nfsfs", NULL);
+       remove_proc_subtree("fs/nfsfs", NULL);
 }
 
 #endif /* CONFIG_PROC_FS */
index b2c8b31b2be77d9a1d524b230ed2b66e479ad3fe..21457bb0edd62b42af307d5850b336711f41e82f 100644 (file)
@@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 {
        int err;
 
-       if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-               *opened |= FILE_CREATED;
-
        err = finish_open(file, dentry, do_open, opened);
        if (err)
                goto out;
index 8b8d83a526ce2366ae974a87761c9c62c22da7f5..cc4fa1ed61fc5bdfe04d1afcaa5f081bb3ba0470 100644 (file)
@@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page)
        return nfs_wb_page(inode, page);
 }
 
-#ifdef CONFIG_NFS_SWAP
 static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                                                sector_t *span)
 {
-       int ret;
        struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
 
        *span = sis->pages;
 
-       rcu_read_lock();
-       ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1);
-       rcu_read_unlock();
-
-       return ret;
+       return rpc_clnt_swap_activate(clnt);
 }
 
 static void nfs_swap_deactivate(struct file *file)
 {
        struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
 
-       rcu_read_lock();
-       xs_swapper(rcu_dereference(clnt->cl_xprt), 0);
-       rcu_read_unlock();
+       rpc_clnt_swap_deactivate(clnt);
 }
-#endif
 
 const struct address_space_operations nfs_file_aops = {
        .readpage = nfs_readpage,
@@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = {
        .launder_page = nfs_launder_page,
        .is_dirty_writeback = nfs_check_dirty_writeback,
        .error_remove_page = generic_error_remove_page,
-#ifdef CONFIG_NFS_SWAP
        .swap_activate = nfs_swap_activate,
        .swap_deactivate = nfs_swap_deactivate,
-#endif
 };
 
 /*
index 7d05089e52d6c8b7a29e92e80011bdee0e06a32c..c12951b9551eab8b0394ed2aa218cf15ce6f2c39 100644 (file)
@@ -20,6 +20,7 @@
 #include "../nfs4trace.h"
 #include "../iostat.h"
 #include "../nfs.h"
+#include "../nfs42.h"
 
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 
@@ -182,17 +183,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
 
 static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
 {
-       struct nfs4_ff_layout_mirror *tmp;
        int i, j;
 
        for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
                for (j = i + 1; j < fls->mirror_array_cnt; j++)
                        if (fls->mirror_array[i]->efficiency <
-                           fls->mirror_array[j]->efficiency) {
-                               tmp = fls->mirror_array[i];
-                               fls->mirror_array[i] = fls->mirror_array[j];
-                               fls->mirror_array[j] = tmp;
-                       }
+                           fls->mirror_array[j]->efficiency)
+                               swap(fls->mirror_array[i],
+                                    fls->mirror_array[j]);
        }
 }
 
@@ -274,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 
                spin_lock_init(&fls->mirror_array[i]->lock);
                fls->mirror_array[i]->ds_count = ds_count;
+               fls->mirror_array[i]->lseg = &fls->generic_hdr;
 
                /* deviceid */
                rc = decode_deviceid(&stream, &devid);
@@ -344,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
                        fls->mirror_array[i]->gid);
        }
 
+       p = xdr_inline_decode(&stream, 4);
+       if (p)
+               fls->flags = be32_to_cpup(p);
+
        ff_layout_sort_mirrors(fls);
        rc = ff_layout_check_layout(lgr);
        if (rc)
@@ -415,6 +418,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
        return 1;
 }
 
+static void
+nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer)
+{
+       /* first IO request? */
+       if (atomic_inc_return(&timer->n_ops) == 1) {
+               timer->start_time = ktime_get();
+       }
+}
+
+static ktime_t
+nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer)
+{
+       ktime_t start, now;
+
+       if (atomic_dec_return(&timer->n_ops) < 0)
+               WARN_ON_ONCE(1);
+
+       now = ktime_get();
+       start = timer->start_time;
+       timer->start_time = now;
+       return ktime_sub(now, start);
+}
+
+static ktime_t
+nfs4_ff_layout_calc_completion_time(struct rpc_task *task)
+{
+       return ktime_sub(ktime_get(), task->tk_start);
+}
+
+static bool
+nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
+                           struct nfs4_ff_layoutstat *layoutstat)
+{
+       static const ktime_t notime = {0};
+       ktime_t now = ktime_get();
+
+       nfs4_ff_start_busy_timer(&layoutstat->busy_timer);
+       if (ktime_equal(mirror->start_time, notime))
+               mirror->start_time = now;
+       if (ktime_equal(mirror->last_report_time, notime))
+               mirror->last_report_time = now;
+       if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+                       FF_LAYOUTSTATS_REPORT_INTERVAL) {
+               mirror->last_report_time = now;
+               return true;
+       }
+
+       return false;
+}
+
+static void
+nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
+               __u64 requested)
+{
+       struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
+
+       iostat->ops_requested++;
+       iostat->bytes_requested += requested;
+}
+
+static void
+nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
+               __u64 requested,
+               __u64 completed,
+               ktime_t time_completed)
+{
+       struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
+       ktime_t timer;
+
+       iostat->ops_completed++;
+       iostat->bytes_completed += completed;
+       iostat->bytes_not_delivered += requested - completed;
+
+       timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer);
+       iostat->total_busy_time =
+                       ktime_add(iostat->total_busy_time, timer);
+       iostat->aggregate_completion_time =
+                       ktime_add(iostat->aggregate_completion_time, time_completed);
+}
+
+static void
+nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
+               __u64 requested)
+{
+       bool report;
+
+       spin_lock(&mirror->lock);
+       report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat);
+       nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
+       spin_unlock(&mirror->lock);
+
+       if (report)
+               pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
+}
+
+static void
+nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
+               struct nfs4_ff_layout_mirror *mirror,
+               __u64 requested,
+               __u64 completed)
+{
+       spin_lock(&mirror->lock);
+       nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
+                       requested, completed,
+                       nfs4_ff_layout_calc_completion_time(task));
+       spin_unlock(&mirror->lock);
+}
+
+static void
+nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
+               __u64 requested)
+{
+       bool report;
+
+       spin_lock(&mirror->lock);
+       report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat);
+       nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
+       spin_unlock(&mirror->lock);
+
+       if (report)
+               pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
+}
+
+static void
+nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
+               struct nfs4_ff_layout_mirror *mirror,
+               __u64 requested,
+               __u64 completed,
+               enum nfs3_stable_how committed)
+{
+       if (committed == NFS_UNSTABLE)
+               requested = completed = 0;
+
+       spin_lock(&mirror->lock);
+       nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
+                       requested, completed,
+                       nfs4_ff_layout_calc_completion_time(task));
+       spin_unlock(&mirror->lock);
+}
+
 static int
 ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
                            struct nfs_commit_info *cinfo,
@@ -631,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
                        nfs_direct_set_resched_writes(hdr->dreq);
                        /* fake unstable write to let common nfs resend pages */
                        hdr->verf.committed = NFS_UNSTABLE;
-                       hdr->good_bytes = 0;
+                       hdr->good_bytes = hdr->args.count;
                }
                return;
        }
@@ -879,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
        return 0;
 }
 
+static bool
+ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg)
+{
+       return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT);
+}
+
 /*
  * We reference the rpc_cred of the first WRITE that triggers the need for
  * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
@@ -891,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
 static void
 ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr)
 {
+       if (!ff_layout_need_layoutcommit(hdr->lseg))
+               return;
+
        pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
                        hdr->mds_offset + hdr->res.count);
        dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
@@ -909,6 +1061,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
 static int ff_layout_read_prepare_common(struct rpc_task *task,
                                         struct nfs_pgio_header *hdr)
 {
+       nfs4_ff_layout_stat_io_start_read(
+                       FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+                       hdr->args.count);
+
        if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
                rpc_exit(task, -EIO);
                return -EIO;
@@ -962,15 +1118,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
 {
        struct nfs_pgio_header *hdr = data;
 
-       if (ff_layout_read_prepare_common(task, hdr))
-               return;
-
        if (ff_layout_setup_sequence(hdr->ds_clp,
                                     &hdr->args.seq_args,
                                     &hdr->res.seq_res,
                                     task))
                return;
 
+       if (ff_layout_read_prepare_common(task, hdr))
+               return;
+
        if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
                        hdr->args.lock_context, FMODE_READ) == -EIO)
                rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -982,6 +1138,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
 
        dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
+       nfs4_ff_layout_stat_io_end_read(task,
+                       FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+                       hdr->args.count, hdr->res.count);
+
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
            task->tk_status == 0) {
                nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1074,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
                return -EAGAIN;
        }
 
-       if (data->verf.committed == NFS_UNSTABLE)
+       if (data->verf.committed == NFS_UNSTABLE
+           && ff_layout_need_layoutcommit(data->lseg))
                pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
 
        return 0;
@@ -1083,6 +1244,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
 static int ff_layout_write_prepare_common(struct rpc_task *task,
                                          struct nfs_pgio_header *hdr)
 {
+       nfs4_ff_layout_stat_io_start_write(
+                       FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+                       hdr->args.count);
+
        if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
                rpc_exit(task, -EIO);
                return -EIO;
@@ -1116,15 +1281,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
 {
        struct nfs_pgio_header *hdr = data;
 
-       if (ff_layout_write_prepare_common(task, hdr))
-               return;
-
        if (ff_layout_setup_sequence(hdr->ds_clp,
                                     &hdr->args.seq_args,
                                     &hdr->res.seq_res,
                                     task))
                return;
 
+       if (ff_layout_write_prepare_common(task, hdr))
+               return;
+
        if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
                        hdr->args.lock_context, FMODE_WRITE) == -EIO)
                rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -1134,6 +1299,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
 {
        struct nfs_pgio_header *hdr = data;
 
+       nfs4_ff_layout_stat_io_end_write(task,
+                       FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+                       hdr->args.count, hdr->res.count,
+                       hdr->res.verf->committed);
+
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
            task->tk_status == 0) {
                nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1152,8 +1322,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
            &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
 }
 
+static void ff_layout_commit_prepare_common(struct rpc_task *task,
+               struct nfs_commit_data *cdata)
+{
+       nfs4_ff_layout_stat_io_start_write(
+                       FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+                       0);
+}
+
 static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
 {
+       ff_layout_commit_prepare_common(task, data);
        rpc_call_start(task);
 }
 
@@ -1161,10 +1340,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
 {
        struct nfs_commit_data *wdata = data;
 
-       ff_layout_setup_sequence(wdata->ds_clp,
+       if (ff_layout_setup_sequence(wdata->ds_clp,
                                 &wdata->args.seq_args,
                                 &wdata->res.seq_res,
-                                task);
+                                task))
+               return;
+       ff_layout_commit_prepare_common(task, data);
+}
+
+static void ff_layout_commit_done(struct rpc_task *task, void *data)
+{
+       struct nfs_commit_data *cdata = data;
+       struct nfs_page *req;
+       __u64 count = 0;
+
+       if (task->tk_status == 0) {
+               list_for_each_entry(req, &cdata->pages, wb_list)
+                       count += req->wb_bytes;
+       }
+
+       nfs4_ff_layout_stat_io_end_write(task,
+                       FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+                       count, count, NFS_FILE_SYNC);
+
+       pnfs_generic_write_commit_done(task, data);
 }
 
 static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
@@ -1205,14 +1404,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
 
 static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_commit_prepare_v3,
-       .rpc_call_done = pnfs_generic_write_commit_done,
+       .rpc_call_done = ff_layout_commit_done,
        .rpc_count_stats = ff_layout_commit_count_stats,
        .rpc_release = pnfs_generic_commit_release,
 };
 
 static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_commit_prepare_v4,
-       .rpc_call_done = pnfs_generic_write_commit_done,
+       .rpc_call_done = ff_layout_commit_done,
        .rpc_count_stats = ff_layout_commit_count_stats,
        .rpc_release = pnfs_generic_commit_release,
 };
@@ -1256,7 +1455,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
        fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
        if (fh)
                hdr->args.fh = fh;
-
        /*
         * Note that if we ever decide to split across DSes,
         * then we may need to handle dense-like offsets.
@@ -1385,6 +1583,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
        fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
        if (fh)
                data->args.fh = fh;
+
        return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
                                   vers == 3 ? &ff_layout_commit_call_ops_v3 :
                                               &ff_layout_commit_call_ops_v4,
@@ -1488,6 +1687,247 @@ out:
        dprintk("%s: Return\n", __func__);
 }
 
+static int
+ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
+{
+       const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+       return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
+}
+
+static size_t
+ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
+                         const int buflen)
+{
+       const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+       const struct in6_addr *addr = &sin6->sin6_addr;
+
+       /*
+        * RFC 4291, Section 2.2.2
+        *
+        * Shorthanded ANY address
+        */
+       if (ipv6_addr_any(addr))
+               return snprintf(buf, buflen, "::");
+
+       /*
+        * RFC 4291, Section 2.2.2
+        *
+        * Shorthanded loopback address
+        */
+       if (ipv6_addr_loopback(addr))
+               return snprintf(buf, buflen, "::1");
+
+       /*
+        * RFC 4291, Section 2.2.3
+        *
+        * Special presentation address format for mapped v4
+        * addresses.
+        */
+       if (ipv6_addr_v4mapped(addr))
+               return snprintf(buf, buflen, "::ffff:%pI4",
+                                       &addr->s6_addr32[3]);
+
+       /*
+        * RFC 4291, Section 2.2.1
+        */
+       return snprintf(buf, buflen, "%pI6c", addr);
+}
+
+/* Derived from rpc_sockaddr2uaddr */
+static void
+ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
+{
+       struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
+       char portbuf[RPCBIND_MAXUADDRPLEN];
+       char addrbuf[RPCBIND_MAXUADDRLEN];
+       char *netid;
+       unsigned short port;
+       int len, netid_len;
+       __be32 *p;
+
+       switch (sap->sa_family) {
+       case AF_INET:
+               if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
+                       return;
+               port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+               netid = "tcp";
+               netid_len = 3;
+               break;
+       case AF_INET6:
+               if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
+                       return;
+               port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+               netid = "tcp6";
+               netid_len = 4;
+               break;
+       default:
+               /* we only support tcp and tcp6 */
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
+       len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
+
+       p = xdr_reserve_space(xdr, 4 + netid_len);
+       xdr_encode_opaque(p, netid, netid_len);
+
+       p = xdr_reserve_space(xdr, 4 + len);
+       xdr_encode_opaque(p, addrbuf, len);
+}
+
+static void
+ff_layout_encode_nfstime(struct xdr_stream *xdr,
+                        ktime_t t)
+{
+       struct timespec64 ts;
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 12);
+       ts = ktime_to_timespec64(t);
+       p = xdr_encode_hyper(p, ts.tv_sec);
+       *p++ = cpu_to_be32(ts.tv_nsec);
+}
+
+static void
+ff_layout_encode_io_latency(struct xdr_stream *xdr,
+                           struct nfs4_ff_io_stat *stat)
+{
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 5 * 8);
+       p = xdr_encode_hyper(p, stat->ops_requested);
+       p = xdr_encode_hyper(p, stat->bytes_requested);
+       p = xdr_encode_hyper(p, stat->ops_completed);
+       p = xdr_encode_hyper(p, stat->bytes_completed);
+       p = xdr_encode_hyper(p, stat->bytes_not_delivered);
+       ff_layout_encode_nfstime(xdr, stat->total_busy_time);
+       ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
+}
+
+static void
+ff_layout_encode_layoutstats(struct xdr_stream *xdr,
+                            struct nfs42_layoutstat_args *args,
+                            struct nfs42_layoutstat_devinfo *devinfo)
+{
+       struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;
+       struct nfs4_pnfs_ds_addr *da;
+       struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
+       struct nfs_fh *fh = &mirror->fh_versions[0];
+       __be32 *p, *start;
+
+       da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
+       dprintk("%s: DS %s: encoding address %s\n",
+               __func__, ds->ds_remotestr, da->da_remotestr);
+       /* layoutupdate length */
+       start = xdr_reserve_space(xdr, 4);
+       /* netaddr4 */
+       ff_layout_encode_netaddr(xdr, da);
+       /* nfs_fh4 */
+       p = xdr_reserve_space(xdr, 4 + fh->size);
+       xdr_encode_opaque(p, fh->data, fh->size);
+       /* ff_io_latency4 read */
+       spin_lock(&mirror->lock);
+       ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
+       /* ff_io_latency4 write */
+       ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
+       spin_unlock(&mirror->lock);
+       /* nfstime4 */
+       ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
+       /* bool */
+       p = xdr_reserve_space(xdr, 4);
+       *p = cpu_to_be32(false);
+
+       *start = cpu_to_be32((xdr->p - start - 1) * 4);
+}
+
+static bool
+ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
+                              struct pnfs_layout_segment *pls,
+                              int *dev_count, int dev_limit)
+{
+       struct nfs4_ff_layout_mirror *mirror;
+       struct nfs4_deviceid_node *dev;
+       struct nfs42_layoutstat_devinfo *devinfo;
+       int i;
+
+       for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) {
+               if (*dev_count >= dev_limit)
+                       break;
+               mirror = FF_LAYOUT_COMP(pls, i);
+               if (!mirror || !mirror->mirror_ds)
+                       continue;
+               dev = FF_LAYOUT_DEVID_NODE(pls, i);
+               devinfo = &args->devinfo[*dev_count];
+               memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
+               devinfo->offset = pls->pls_range.offset;
+               devinfo->length = pls->pls_range.length;
+               /* well, we don't really know if IO is continuous or not! */
+               devinfo->read_count = mirror->read_stat.io_stat.bytes_completed;
+               devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
+               devinfo->write_count = mirror->write_stat.io_stat.bytes_completed;
+               devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
+               devinfo->layout_type = LAYOUT_FLEX_FILES;
+               devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
+               devinfo->layout_private = mirror;
+               /* lseg refcount put in cleanup_layoutstats */
+               pnfs_get_lseg(pls);
+
+               ++(*dev_count);
+       }
+
+       return *dev_count < dev_limit;
+}
+
+static int
+ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
+{
+       struct pnfs_layout_segment *pls;
+       int dev_count = 0;
+
+       spin_lock(&args->inode->i_lock);
+       list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
+               dev_count += FF_LAYOUT_MIRROR_COUNT(pls);
+       }
+       spin_unlock(&args->inode->i_lock);
+       /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
+       if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) {
+               dprintk("%s: truncating devinfo to limit (%d:%d)\n",
+                       __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV);
+               dev_count = PNFS_LAYOUTSTATS_MAXDEV;
+       }
+       args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL);
+       if (!args->devinfo)
+               return -ENOMEM;
+
+       dev_count = 0;
+       spin_lock(&args->inode->i_lock);
+       list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
+               if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count,
+                                                   PNFS_LAYOUTSTATS_MAXDEV)) {
+                       break;
+               }
+       }
+       spin_unlock(&args->inode->i_lock);
+       args->num_dev = dev_count;
+
+       return 0;
+}
+
+static void
+ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
+{
+       struct nfs4_ff_layout_mirror *mirror;
+       int i;
+
+       for (i = 0; i < data->args.num_dev; i++) {
+               mirror = data->args.devinfo[i].layout_private;
+               data->args.devinfo[i].layout_private = NULL;
+               pnfs_put_lseg(mirror->lseg);
+       }
+}
+
 static struct pnfs_layoutdriver_type flexfilelayout_type = {
        .id                     = LAYOUT_FLEX_FILES,
        .name                   = "LAYOUT_FLEX_FILES",
@@ -1510,6 +1950,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
        .alloc_deviceid_node    = ff_layout_alloc_deviceid_node,
        .encode_layoutreturn    = ff_layout_encode_layoutreturn,
        .sync                   = pnfs_nfs_generic_sync,
+       .prepare_layoutstats    = ff_layout_prepare_layoutstats,
+       .cleanup_layoutstats    = ff_layout_cleanup_layoutstats,
 };
 
 static int __init nfs4flexfilelayout_init(void)
index 070f20445b2d33883445038d4888538845555198..f92f9a0a856b3e698c8859923438549d1bffed37 100644 (file)
@@ -9,12 +9,17 @@
 #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H
 #define FS_NFS_NFS4FLEXFILELAYOUT_H
 
+#define FF_FLAGS_NO_LAYOUTCOMMIT 1
+
 #include "../pnfs.h"
 
 /* XXX: Let's filter out insanely large mirror count for now to avoid oom
  * due to network error etc. */
 #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
 
+/* LAYOUTSTATS report interval in ms */
+#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
+
 struct nfs4_ff_ds_version {
        u32                             version;
        u32                             minor_version;
@@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err {
        struct nfs4_deviceid            deviceid;
 };
 
+struct nfs4_ff_io_stat {
+       __u64                           ops_requested;
+       __u64                           bytes_requested;
+       __u64                           ops_completed;
+       __u64                           bytes_completed;
+       __u64                           bytes_not_delivered;
+       ktime_t                         total_busy_time;
+       ktime_t                         aggregate_completion_time;
+};
+
+struct nfs4_ff_busy_timer {
+       ktime_t start_time;
+       atomic_t n_ops;
+};
+
+struct nfs4_ff_layoutstat {
+       struct nfs4_ff_io_stat io_stat;
+       struct nfs4_ff_busy_timer busy_timer;
+};
+
 struct nfs4_ff_layout_mirror {
+       struct pnfs_layout_segment      *lseg; /* back pointer */
        u32                             ds_count;
        u32                             efficiency;
        struct nfs4_ff_layout_ds        *mirror_ds;
        u32                             fh_versions_cnt;
        struct nfs_fh                   *fh_versions;
        nfs4_stateid                    stateid;
-       struct nfs4_string              user_name;
-       struct nfs4_string              group_name;
        u32                             uid;
        u32                             gid;
        struct rpc_cred                 *cred;
        spinlock_t                      lock;
+       struct nfs4_ff_layoutstat       read_stat;
+       struct nfs4_ff_layoutstat       write_stat;
+       ktime_t                         start_time;
+       ktime_t                         last_report_time;
 };
 
 struct nfs4_ff_layout_segment {
        struct pnfs_layout_segment      generic_hdr;
        u64                             stripe_unit;
+       u32                             flags;
        u32                             mirror_array_cnt;
        struct nfs4_ff_layout_mirror    **mirror_array;
 };
index 77a2d026aa12b62bdc29dac2345cff0b3237e9c4..f13e1969eedd911bf6a5d9be6af6e4ae403f6c1e 100644 (file)
@@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
                                __func__, PTR_ERR(cred));
                        return PTR_ERR(cred);
                } else {
-                       mirror->cred = cred;
+                       if (cmpxchg(&mirror->cred, NULL, cred))
+                               put_rpccred(cred);
                }
        }
        return 0;
@@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
        /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
        smp_rmb();
        if (ds->ds_clp)
-               goto out;
+               goto out_update_creds;
 
        flavor = nfs4_ff_layout_choose_authflavor(mirror);
 
@@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                        }
                }
        }
-
+out_update_creds:
        if (ff_layout_update_mirror_cred(mirror, ds))
                ds = NULL;
 out:
index f734562c6d244034cb5036fee8ab0b7d69cc90c5..b77b328a06d74f0124d2a65b51fac0fc21fbd692 100644 (file)
@@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        if (!err) {
                generic_fillattr(inode, stat);
                stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+               if (S_ISDIR(inode->i_mode))
+                       stat->blksize = NFS_SERVER(inode)->dtsize;
        }
 out:
        trace_nfs_getattr_exit(inode, err);
@@ -2008,17 +2010,15 @@ static int __init init_nfs_fs(void)
        if (err)
                goto out1;
 
-#ifdef CONFIG_PROC_FS
        rpc_proc_register(&init_net, &nfs_rpcstat);
-#endif
-       if ((err = register_nfs_fs()) != 0)
+
+       err = register_nfs_fs();
+       if (err)
                goto out0;
 
        return 0;
 out0:
-#ifdef CONFIG_PROC_FS
        rpc_proc_unregister(&init_net, "nfs");
-#endif
        nfs_destroy_directcache();
 out1:
        nfs_destroy_writepagecache();
@@ -2049,9 +2049,7 @@ static void __exit exit_nfs_fs(void)
        nfs_destroy_nfspagecache();
        nfs_fscache_unregister();
        unregister_pernet_subsys(&nfs_net_ops);
-#ifdef CONFIG_PROC_FS
        rpc_proc_unregister(&init_net, "nfs");
-#endif
        unregister_nfs_fs();
        nfs_fs_proc_exit();
        nfsiod_stop();
index 53852a4bd88be68781bb9dd8a39760fd497d1d61..9b04c2e6fffc3f306f3c598b7c4557beff653c8e 100644 (file)
@@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
        if (args->npages != 0)
                xdr_write_pages(xdr, args->pages, 0, args->len);
        else
-               xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
+               xdr_reserve_space(xdr, args->len);
 
        error = nfsacl_encode(xdr->buf, base, args->inode,
                            (args->mask & NFS_ACL) ?
index 7afb8947dfdf3e299ee39c6490640f40567a70b7..ff66ae700b8991eeed513e210397f6639c0e5f70 100644 (file)
@@ -5,11 +5,18 @@
 #ifndef __LINUX_FS_NFS_NFS4_2_H
 #define __LINUX_FS_NFS_NFS4_2_H
 
+/*
+ * FIXME:  four LAYOUTSTATS calls per compound at most! Do we need to support
+ * more? Need to consider not to pre-alloc too much for a compound.
+ */
+#define PNFS_LAYOUTSTATS_MAXDEV (4)
+
 /* nfs4.2proc.c */
 int nfs42_proc_allocate(struct file *, loff_t, loff_t);
 int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
 loff_t nfs42_proc_llseek(struct file *, loff_t, int);
-
+int nfs42_proc_layoutstats_generic(struct nfs_server *,
+                                  struct nfs42_layoutstat_data *);
 /* nfs4.2xdr.h */
 extern struct rpc_procinfo nfs4_2_procedures[];
 
index 3a9e75235f30e60e5a4ae974864c63fb003b969d..f486b80f927ab7204159852a9740900a6c73aec6 100644 (file)
 #include <linux/nfs_fs.h>
 #include "nfs4_fs.h"
 #include "nfs42.h"
+#include "iostat.h"
+#include "pnfs.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS
 
 static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
                                fmode_t fmode)
@@ -165,3 +170,85 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 
        return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
 }
+
+static void
+nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
+{
+       struct nfs42_layoutstat_data *data = calldata;
+       struct nfs_server *server = NFS_SERVER(data->args.inode);
+
+       nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
+                            &data->res.seq_res, task);
+}
+
+static void
+nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
+{
+       struct nfs42_layoutstat_data *data = calldata;
+
+       if (!nfs4_sequence_done(task, &data->res.seq_res))
+               return;
+
+       switch (task->tk_status) {
+       case 0:
+               break;
+       case -ENOTSUPP:
+       case -EOPNOTSUPP:
+               NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
+       default:
+               dprintk("%s server returns %d\n", __func__, task->tk_status);
+       }
+}
+
+static void
+nfs42_layoutstat_release(void *calldata)
+{
+       struct nfs42_layoutstat_data *data = calldata;
+       struct nfs_server *nfss = NFS_SERVER(data->args.inode);
+
+       if (nfss->pnfs_curr_ld->cleanup_layoutstats)
+               nfss->pnfs_curr_ld->cleanup_layoutstats(data);
+
+       pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
+       smp_mb__before_atomic();
+       clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags);
+       smp_mb__after_atomic();
+       nfs_iput_and_deactive(data->inode);
+       kfree(data->args.devinfo);
+       kfree(data);
+}
+
+static const struct rpc_call_ops nfs42_layoutstat_ops = {
+       .rpc_call_prepare = nfs42_layoutstat_prepare,
+       .rpc_call_done = nfs42_layoutstat_done,
+       .rpc_release = nfs42_layoutstat_release,
+};
+
+int nfs42_proc_layoutstats_generic(struct nfs_server *server,
+                                  struct nfs42_layoutstat_data *data)
+{
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS],
+               .rpc_argp = &data->args,
+               .rpc_resp = &data->res,
+       };
+       struct rpc_task_setup task_setup = {
+               .rpc_client = server->client,
+               .rpc_message = &msg,
+               .callback_ops = &nfs42_layoutstat_ops,
+               .callback_data = data,
+               .flags = RPC_TASK_ASYNC,
+       };
+       struct rpc_task *task;
+
+       data->inode = nfs_igrab_and_active(data->args.inode);
+       if (!data->inode) {
+               nfs42_layoutstat_release(data);
+               return -EAGAIN;
+       }
+       nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+       task = rpc_run_task(&task_setup);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+       return 0;
+}
index 1a25b27248f2ff5fd0026b00a3032589ee8cdff5..a6bd27da6286f9fee14f0b087eddcc1ec437cdde 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef __LINUX_FS_NFS_NFS4_2XDR_H
 #define __LINUX_FS_NFS_NFS4_2XDR_H
 
+#include "nfs42.h"
+
 #define encode_fallocate_maxsz         (encode_stateid_maxsz + \
                                         2 /* offset */ + \
                                         2 /* length */)
                                         1 /* whence */ + \
                                         2 /* offset */ + \
                                         2 /* length */)
+#define encode_io_info_maxsz           4
+#define encode_layoutstats_maxsz       (op_decode_hdr_maxsz + \
+                                       2 /* offset */ + \
+                                       2 /* length */ + \
+                                       encode_stateid_maxsz + \
+                                       encode_io_info_maxsz + \
+                                       encode_io_info_maxsz + \
+                                       1 /* opaque devaddr4 length */ + \
+                                       XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
+#define decode_layoutstats_maxsz       (op_decode_hdr_maxsz)
 
 #define NFS4_enc_allocate_sz           (compound_encode_hdr_maxsz + \
                                         encode_putfh_maxsz + \
 #define NFS4_dec_seek_sz               (compound_decode_hdr_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_seek_maxsz)
+#define NFS4_enc_layoutstats_sz                (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
+                                        encode_putfh_maxsz + \
+                                        PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz)
+#define NFS4_dec_layoutstats_sz                (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
+                                        decode_putfh_maxsz + \
+                                        PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
 
 
 static void encode_fallocate(struct xdr_stream *xdr,
@@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr,
        encode_uint32(xdr, args->sa_what);
 }
 
+static void encode_layoutstats(struct xdr_stream *xdr,
+                              struct nfs42_layoutstat_args *args,
+                              struct nfs42_layoutstat_devinfo *devinfo,
+                              struct compound_hdr *hdr)
+{
+       __be32 *p;
+
+       encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr);
+       p = reserve_space(xdr, 8 + 8);
+       p = xdr_encode_hyper(p, devinfo->offset);
+       p = xdr_encode_hyper(p, devinfo->length);
+       encode_nfs4_stateid(xdr, &args->stateid);
+       p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4);
+       p = xdr_encode_hyper(p, devinfo->read_count);
+       p = xdr_encode_hyper(p, devinfo->read_bytes);
+       p = xdr_encode_hyper(p, devinfo->write_count);
+       p = xdr_encode_hyper(p, devinfo->write_bytes);
+       p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data,
+                       NFS4_DEVICEID4_SIZE);
+       /* Encode layoutupdate4 */
+       *p++ = cpu_to_be32(devinfo->layout_type);
+       if (devinfo->layoutstats_encode != NULL)
+               devinfo->layoutstats_encode(xdr, args, devinfo);
+       else
+               encode_uint32(xdr, 0);
+}
+
 /*
  * Encode ALLOCATE request
  */
@@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
        encode_nops(&hdr);
 }
 
+/*
+ * Encode LAYOUTSTATS request
+ */
+static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
+                                    struct xdr_stream *xdr,
+                                    struct nfs42_layoutstat_args *args)
+{
+       int i;
+
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_putfh(xdr, args->fh, &hdr);
+       WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
+       for (i = 0; i < args->num_dev; i++)
+               encode_layoutstats(xdr, args, &args->devinfo[i], &hdr);
+       encode_nops(&hdr);
+}
+
 static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
        return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -169,6 +238,12 @@ out_overflow:
        return -EIO;
 }
 
+static int decode_layoutstats(struct xdr_stream *xdr,
+                             struct nfs42_layoutstat_res *res)
+{
+       return decode_op_hdr(xdr, OP_LAYOUTSTATS);
+}
+
 /*
  * Decode ALLOCATE request
  */
@@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
 out:
        return status;
 }
+
+/*
+ * Decode LAYOUTSTATS request
+ */
+static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
+                                   struct xdr_stream *xdr,
+                                   struct nfs42_layoutstat_res *res)
+{
+       struct compound_hdr hdr;
+       int status, i;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_putfh(xdr);
+       if (status)
+               goto out;
+       WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
+       for (i = 0; i < res->num_dev; i++) {
+               status = decode_layoutstats(xdr, res);
+               if (status)
+                       goto out;
+       }
+out:
+       res->rpc_status = status;
+       return status;
+}
+
 #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
index fdef424b0cd3c6120ddc7d9e379e49647b7b24fb..ea3bee919a765840a267f8fc59ccdec4ef61f676 100644 (file)
@@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception
 extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
                          struct rpc_message *, struct nfs4_sequence_args *,
                          struct nfs4_sequence_res *, int);
+extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int);
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
 extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
index e42be52a8c18d8121c934a5ac79483e77166206c..3aa6a9ba51136f31f30dea29d60dded106b05241 100644 (file)
@@ -676,7 +676,6 @@ found:
                break;
        }
 
-       /* No matching nfs_client found. */
        spin_unlock(&nn->nfs_client_lock);
        dprintk("NFS: <-- %s status = %d\n", __func__, status);
        nfs_put_client(prev);
index f58c17b3b480367c6322359ae7ca4b33b3695348..dcd39d4e2efebd78eed64d4df00fd2745f747027 100644 (file)
@@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp)
 
        dprintk("NFS: open file(%pd2)\n", dentry);
 
+       err = nfs_check_flags(openflags);
+       if (err)
+               return err;
+
        if ((openflags & O_ACCMODE) == 3)
                openflags--;
 
index c0b3a16b4a00806f79ea9eb28b6933a8d94bcc52..039b3eb6d83404f33224961465406d52203ff570 100644 (file)
@@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
                goto out;
        }
 
-       if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
-               printk(KERN_ERR "nfs4_get_rootfh:"
-                      " getroot obtained referral\n");
-               ret = -EREMOTE;
-               goto out;
-       }
-
        memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
 out:
        nfs_free_fattr(fsinfo.fattr);
index 2e1737c40a29837488823e04f27db975e0a51b9b..535dfc69c628f825cc4422339406b1365f66e8d4 100644 (file)
@@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp)
 
 int nfs_idmap_init(void)
 {
-       int ret;
-       ret = nfs_idmap_init_keyring();
-       if (ret != 0)
-               goto out;
-out:
-       return ret;
+       return nfs_idmap_init_keyring();
 }
 
 void nfs_idmap_quit(void)
index 55e1e3af23a3d3f2313f977b185eb8c3f8ccbc6d..6f228b5af819ea576240c40869c1da74d823e460 100644 (file)
@@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
                case 0:
                        return 0;
                case -NFS4ERR_OPENMODE:
+               case -NFS4ERR_DELEG_REVOKED:
+               case -NFS4ERR_ADMIN_REVOKED:
+               case -NFS4ERR_BAD_STATEID:
                        if (inode && nfs4_have_delegation(inode, FMODE_READ)) {
                                nfs4_inode_return_delegation(inode);
                                exception->retry = 1;
@@ -367,15 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
                        if (ret < 0)
                                break;
                        goto wait_on_recovery;
-               case -NFS4ERR_DELEG_REVOKED:
-               case -NFS4ERR_ADMIN_REVOKED:
-               case -NFS4ERR_BAD_STATEID:
-                       if (state == NULL)
-                               break;
-                       ret = nfs4_schedule_stateid_recovery(server, state);
-                       if (ret < 0)
-                               break;
-                       goto wait_on_recovery;
                case -NFS4ERR_EXPIRED:
                        if (state != NULL) {
                                ret = nfs4_schedule_stateid_recovery(server, state);
@@ -482,8 +476,8 @@ struct nfs4_call_sync_data {
        struct nfs4_sequence_res *seq_res;
 };
 
-static void nfs4_init_sequence(struct nfs4_sequence_args *args,
-                              struct nfs4_sequence_res *res, int cache_reply)
+void nfs4_init_sequence(struct nfs4_sequence_args *args,
+                       struct nfs4_sequence_res *res, int cache_reply)
 {
        args->sa_slot = NULL;
        args->sa_cache_this = cache_reply;
@@ -1553,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
        struct nfs4_state *newstate;
        int ret;
 
+       if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
+            opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
+           (opendata->o_arg.u.delegation_type & fmode) != fmode)
+               /* This mode can't have been delegated, so we must have
+                * a valid open_stateid to cover it - not need to reclaim.
+                */
+               return 0;
        opendata->o_arg.open_flags = 0;
        opendata->o_arg.fmode = fmode;
        opendata->o_arg.share_access = nfs4_map_atomic_open_share(
@@ -1684,6 +1685,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
                                        "%d.\n", __func__, err);
                case 0:
                case -ENOENT:
+               case -EAGAIN:
                case -ESTALE:
                        break;
                case -NFS4ERR_BADSESSION:
@@ -3355,6 +3357,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
                        goto out;
                case -NFS4ERR_MOVED:
                        err = nfs4_get_referral(client, dir, name, fattr, fhandle);
+                       if (err == -NFS4ERR_MOVED)
+                               err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
                        goto out;
                case -NFS4ERR_WRONGSEC:
                        err = -EPERM;
@@ -4955,49 +4959,128 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
        memcpy(bootverf->data, verf, sizeof(bootverf->data));
 }
 
-static unsigned int
-nfs4_init_nonuniform_client_string(struct nfs_client *clp,
-                                  char *buf, size_t len)
+static int
+nfs4_init_nonuniform_client_string(struct nfs_client *clp)
 {
-       unsigned int result;
+       int result;
+       size_t len;
+       char *str;
+       bool retried = false;
 
        if (clp->cl_owner_id != NULL)
-               return strlcpy(buf, clp->cl_owner_id, len);
+               return 0;
+retry:
+       rcu_read_lock();
+       len = 10 + strlen(clp->cl_ipaddr) + 1 +
+               strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) +
+               1 +
+               strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) +
+               1;
+       rcu_read_unlock();
+
+       if (len > NFS4_OPAQUE_LIMIT + 1)
+               return -EINVAL;
+
+       /*
+        * Since this string is allocated at mount time, and held until the
+        * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+        * about a memory-reclaim deadlock.
+        */
+       str = kmalloc(len, GFP_KERNEL);
+       if (!str)
+               return -ENOMEM;
 
        rcu_read_lock();
-       result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
-                               clp->cl_ipaddr,
-                               rpc_peeraddr2str(clp->cl_rpcclient,
-                                                       RPC_DISPLAY_ADDR),
-                               rpc_peeraddr2str(clp->cl_rpcclient,
-                                                       RPC_DISPLAY_PROTO));
+       result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
+                       clp->cl_ipaddr,
+                       rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+                       rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO));
        rcu_read_unlock();
-       clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
-       return result;
+
+       /* Did something change? */
+       if (result >= len) {
+               kfree(str);
+               if (retried)
+                       return -EINVAL;
+               retried = true;
+               goto retry;
+       }
+       clp->cl_owner_id = str;
+       return 0;
 }
 
-static unsigned int
-nfs4_init_uniform_client_string(struct nfs_client *clp,
-                               char *buf, size_t len)
+static int
+nfs4_init_uniquifier_client_string(struct nfs_client *clp)
+{
+       int result;
+       size_t len;
+       char *str;
+
+       len = 10 + 10 + 1 + 10 + 1 +
+               strlen(nfs4_client_id_uniquifier) + 1 +
+               strlen(clp->cl_rpcclient->cl_nodename) + 1;
+
+       if (len > NFS4_OPAQUE_LIMIT + 1)
+               return -EINVAL;
+
+       /*
+        * Since this string is allocated at mount time, and held until the
+        * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+        * about a memory-reclaim deadlock.
+        */
+       str = kmalloc(len, GFP_KERNEL);
+       if (!str)
+               return -ENOMEM;
+
+       result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+                       clp->rpc_ops->version, clp->cl_minorversion,
+                       nfs4_client_id_uniquifier,
+                       clp->cl_rpcclient->cl_nodename);
+       if (result >= len) {
+               kfree(str);
+               return -EINVAL;
+       }
+       clp->cl_owner_id = str;
+       return 0;
+}
+
+static int
+nfs4_init_uniform_client_string(struct nfs_client *clp)
 {
-       const char *nodename = clp->cl_rpcclient->cl_nodename;
-       unsigned int result;
+       int result;
+       size_t len;
+       char *str;
 
        if (clp->cl_owner_id != NULL)
-               return strlcpy(buf, clp->cl_owner_id, len);
+               return 0;
 
        if (nfs4_client_id_uniquifier[0] != '\0')
-               result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
-                               clp->rpc_ops->version,
-                               clp->cl_minorversion,
-                               nfs4_client_id_uniquifier,
-                               nodename);
-       else
-               result = scnprintf(buf, len, "Linux NFSv%u.%u %s",
-                               clp->rpc_ops->version, clp->cl_minorversion,
-                               nodename);
-       clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
-       return result;
+               return nfs4_init_uniquifier_client_string(clp);
+
+       len = 10 + 10 + 1 + 10 + 1 +
+               strlen(clp->cl_rpcclient->cl_nodename) + 1;
+
+       if (len > NFS4_OPAQUE_LIMIT + 1)
+               return -EINVAL;
+
+       /*
+        * Since this string is allocated at mount time, and held until the
+        * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+        * about a memory-reclaim deadlock.
+        */
+       str = kmalloc(len, GFP_KERNEL);
+       if (!str)
+               return -ENOMEM;
+
+       result = scnprintf(str, len, "Linux NFSv%u.%u %s",
+                       clp->rpc_ops->version, clp->cl_minorversion,
+                       clp->cl_rpcclient->cl_nodename);
+       if (result >= len) {
+               kfree(str);
+               return -EINVAL;
+       }
+       clp->cl_owner_id = str;
+       return 0;
 }
 
 /*
@@ -5044,7 +5127,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
        struct nfs4_setclientid setclientid = {
                .sc_verifier = &sc_verifier,
                .sc_prog = program,
-               .sc_cb_ident = clp->cl_cb_ident,
+               .sc_clnt = clp,
        };
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -5064,16 +5147,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 
        /* nfs_client_id4 */
        nfs4_init_boot_verifier(clp, &sc_verifier);
+
        if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags))
-               setclientid.sc_name_len =
-                               nfs4_init_uniform_client_string(clp,
-                                               setclientid.sc_name,
-                                               sizeof(setclientid.sc_name));
+               status = nfs4_init_uniform_client_string(clp);
        else
-               setclientid.sc_name_len =
-                               nfs4_init_nonuniform_client_string(clp,
-                                               setclientid.sc_name,
-                                               sizeof(setclientid.sc_name));
+               status = nfs4_init_nonuniform_client_string(clp);
+
+       if (status)
+               goto out;
+
        /* cb_client4 */
        setclientid.sc_netid_len =
                                nfs4_init_callback_netid(clp,
@@ -5083,9 +5165,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                                sizeof(setclientid.sc_uaddr), "%s.%u.%u",
                                clp->cl_ipaddr, port >> 8, port & 255);
 
-       dprintk("NFS call  setclientid auth=%s, '%.*s'\n",
+       dprintk("NFS call  setclientid auth=%s, '%s'\n",
                clp->cl_rpcclient->cl_auth->au_ops->au_name,
-               setclientid.sc_name_len, setclientid.sc_name);
+               clp->cl_owner_id);
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task)) {
                status = PTR_ERR(task);
@@ -5402,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
        atomic_inc(&lsp->ls_count);
        /* Ensure we don't close file until we're done freeing locks! */
        p->ctx = get_nfs_open_context(ctx);
+       get_file(fl->fl_file);
        memcpy(&p->fl, fl, sizeof(p->fl));
        p->server = NFS_SERVER(inode);
        return p;
@@ -5413,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data)
        nfs_free_seqid(calldata->arg.seqid);
        nfs4_put_lock_state(calldata->lsp);
        put_nfs_open_context(calldata->ctx);
+       fput(calldata->fl.fl_file);
        kfree(calldata);
 }
 
@@ -6846,11 +6930,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
        };
 
        nfs4_init_boot_verifier(clp, &verifier);
-       args.id_len = nfs4_init_uniform_client_string(clp, args.id,
-                                                       sizeof(args.id));
-       dprintk("NFS call  exchange_id auth=%s, '%.*s'\n",
+
+       status = nfs4_init_uniform_client_string(clp);
+       if (status)
+               goto out;
+
+       dprintk("NFS call  exchange_id auth=%s, '%s'\n",
                clp->cl_rpcclient->cl_auth->au_ops->au_name,
-               args.id_len, args.id);
+               clp->cl_owner_id);
 
        res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
                                        GFP_NOFS);
@@ -6885,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
                /* unsupported! */
                WARN_ON_ONCE(1);
                status = -EINVAL;
-               goto out_server_scope;
+               goto out_impl_id;
        }
 
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
@@ -6913,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
                /* use the most recent implementation id */
                kfree(clp->cl_implid);
                clp->cl_implid = res.impl_id;
+               res.impl_id = NULL;
 
                if (clp->cl_serverscope != NULL &&
                    !nfs41_same_server_scope(clp->cl_serverscope,
@@ -6926,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 
                if (clp->cl_serverscope == NULL) {
                        clp->cl_serverscope = res.server_scope;
-                       goto out;
+                       res.server_scope = NULL;
                }
-       } else
-               kfree(res.impl_id);
+       }
 
-out_server_owner:
-       kfree(res.server_owner);
+out_impl_id:
+       kfree(res.impl_id);
 out_server_scope:
        kfree(res.server_scope);
+out_server_owner:
+       kfree(res.server_owner);
 out:
        if (clp->cl_implid != NULL)
                dprintk("NFS reply exchange_id: Server Implementation ID: "
@@ -8061,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
        struct rpc_task *task;
        int status = 0;
 
-       dprintk("NFS: %4d initiating layoutcommit call. sync %d "
-               "lbw: %llu inode %lu\n",
-               data->task.tk_pid, sync,
+       dprintk("NFS: initiating layoutcommit call. sync %d "
+               "lbw: %llu inode %lu\n", sync,
                data->args.lastbytewritten,
                data->args.inode->i_ino);
 
@@ -8557,7 +8645,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
                | NFS_CAP_ATOMIC_OPEN_V1
                | NFS_CAP_ALLOCATE
                | NFS_CAP_DEALLOCATE
-               | NFS_CAP_SEEK,
+               | NFS_CAP_SEEK
+               | NFS_CAP_LAYOUTSTATS,
        .init_client = nfs41_init_client,
        .shutdown_client = nfs41_shutdown_client,
        .match_stateid = nfs41_match_stateid,
index 2782cfca22650922e012a4f86a1755e3cca68243..605840dc89cf9e28c173659af201aab109f9328d 100644 (file)
@@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 
        if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
                goto do_confirm;
-       nfs4_begin_drain_session(clp);
        status = nfs4_proc_exchange_id(clp, cred);
        if (status != 0)
                goto out;
@@ -1482,6 +1481,8 @@ restart:
                                        spin_unlock(&state->state_lock);
                                }
                                nfs4_put_open_state(state);
+                               clear_bit(NFS4CLNT_RECLAIM_NOGRACE,
+                                       &state->flags);
                                spin_lock(&sp->so_lock);
                                goto restart;
                        }
@@ -1830,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp)
                clp->cl_mvops->reboot_recovery_ops;
        int status;
 
+       nfs4_begin_drain_session(clp);
        cred = nfs4_get_clid_cred(clp);
        if (cred == NULL)
                return -ENOENT;
index 0aea97841d3038b56056d0d7fcd0dcddb11f584e..558cd65dbdb752d111b5b85649b72bae36fdf040 100644 (file)
@@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int);
 #define encode_setclientid_maxsz \
                                (op_encode_hdr_maxsz + \
                                XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
-                               XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
+                               /* client name */ \
+                               1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
                                1 /* sc_prog */ + \
                                1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
                                1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
@@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int);
 #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
                                encode_verifier_maxsz + \
                                1 /* co_ownerid.len */ + \
-                               XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
+                               /* eia_clientowner */ \
+                               1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
                                1 /* flags */ + \
                                1 /* spa_how */ + \
                                /* max is SP4_MACH_CRED (for now) */ + \
@@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
        encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
        encode_nfs4_verifier(xdr, setclientid->sc_verifier);
 
-       encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
+       encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id),
+                       setclientid->sc_clnt->cl_owner_id);
        p = reserve_space(xdr, 4);
        *p = cpu_to_be32(setclientid->sc_prog);
        encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
        encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
        p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(setclientid->sc_cb_ident);
+       *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident);
 }
 
 static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
@@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr,
        encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
        encode_nfs4_verifier(xdr, args->verifier);
 
-       encode_string(xdr, args->id_len, args->id);
+       encode_string(xdr, strlen(args->client->cl_owner_id),
+                       args->client->cl_owner_id);
 
        encode_uint32(xdr, args->flags);
        encode_uint32(xdr, args->state_protect.how);
@@ -7427,6 +7431,7 @@ struct rpc_procinfo       nfs4_procedures[] = {
        PROC(SEEK,              enc_seek,               dec_seek),
        PROC(ALLOCATE,          enc_allocate,           dec_allocate),
        PROC(DEALLOCATE,        enc_deallocate,         dec_deallocate),
+       PROC(LAYOUTSTATS,       enc_layoutstats,        dec_layoutstats),
 #endif /* CONFIG_NFS_V4_2 */
 };
 
index 282b3936951060a2c8a6216c4c690e166a5fcb8d..1da68d3b1edabdb78c60527f502af5f40d6cf69b 100644 (file)
@@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
 
        hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
 
-       dprintk("NFS: %5u initiated pgio call "
+       dprintk("NFS: initiated pgio call "
                "(req %s/%llu, %u bytes @ offset %llu)\n",
-               hdr->task.tk_pid,
                hdr->inode->i_sb->s_id,
                (unsigned long long)NFS_FILEID(hdr->inode),
                hdr->args.count,
@@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 static void nfs_pgio_release(void *calldata)
 {
        struct nfs_pgio_header *hdr = calldata;
-       if (hdr->rw_ops->rw_release)
-               hdr->rw_ops->rw_release(hdr);
        nfs_pgio_data_destroy(hdr);
        hdr->completion_ops->completion(hdr);
 }
@@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror,
  * nfs_pageio_init - initialise a page io descriptor
  * @desc: pointer to descriptor
  * @inode: pointer to inode
- * @doio: pointer to io function
+ * @pg_ops: pointer to pageio operations
+ * @compl_ops: pointer to pageio completion operations
+ * @rw_ops: pointer to nfs read/write operations
  * @bsize: io block size
  * @io_flags: extra parameters for the io function
  */
@@ -1186,6 +1185,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
  * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an
  *                             nfs_pageio_descriptor
  * @desc: pointer to io descriptor
+ * @mirror_idx: pointer to mirror index
  */
 static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
                                       u32 mirror_idx)
index 230606243be6ad079733e583d173b14d3baeda55..0ba9a02c95664960f8c0f46ea97249bd8653fe16 100644 (file)
@@ -35,6 +35,7 @@
 #include "iostat.h"
 #include "nfs4trace.h"
 #include "delegation.h"
+#include "nfs42.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PNFS
 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@@ -1821,6 +1822,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
        /* Resend all requests through the MDS */
        nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
                              hdr->completion_ops);
+       set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
        return nfs_pageio_resend(&pgio, hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
@@ -1865,6 +1867,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
                mirror->pg_recoalesce = 1;
        }
        nfs_pgio_data_destroy(hdr);
+       hdr->release(hdr);
 }
 
 static enum pnfs_try_status
@@ -1979,6 +1982,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
                mirror->pg_recoalesce = 1;
        }
        nfs_pgio_data_destroy(hdr);
+       hdr->release(hdr);
 }
 
 /*
@@ -2247,3 +2251,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
        }
        return thp;
 }
+
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+int
+pnfs_report_layoutstat(struct inode *inode)
+{
+       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs42_layoutstat_data *data;
+       struct pnfs_layout_hdr *hdr;
+       int status = 0;
+
+       if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
+               goto out;
+
+       if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
+               goto out;
+
+       if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
+               goto out;
+
+       spin_lock(&inode->i_lock);
+       if (!NFS_I(inode)->layout) {
+               spin_unlock(&inode->i_lock);
+               goto out;
+       }
+       hdr = NFS_I(inode)->layout;
+       pnfs_get_layout_hdr(hdr);
+       spin_unlock(&inode->i_lock);
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data) {
+               status = -ENOMEM;
+               goto out_put;
+       }
+
+       data->args.fh = NFS_FH(inode);
+       data->args.inode = inode;
+       nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
+       status = ld->prepare_layoutstats(&data->args);
+       if (status)
+               goto out_free;
+
+       status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
+
+out:
+       dprintk("%s returns %d\n", __func__, status);
+       return status;
+
+out_free:
+       kfree(data);
+out_put:
+       pnfs_put_layout_hdr(hdr);
+       smp_mb__before_atomic();
+       clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
+       smp_mb__after_atomic();
+       goto out;
+}
+EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
+#endif
index 1e6308f82fc3d5887de850e72226233e4f2138d2..3e6ab7bfbabd428425227b6f9d2a94711edd371e 100644 (file)
@@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type {
        void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
                                     struct xdr_stream *xdr,
                                     const struct nfs4_layoutcommit_args *args);
+       int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
+       void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data);
 };
 
 struct pnfs_layout_hdr {
@@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 void pnfs_error_mark_layout_for_return(struct inode *inode,
                                       struct pnfs_layout_segment *lseg);
-
 /* nfs4_deviceid_flags */
 enum {
        NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
@@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
 
 #endif /* CONFIG_NFS_V4_1 */
 
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+int pnfs_report_layoutstat(struct inode *inode);
+#else
+static inline int
+pnfs_report_layoutstat(struct inode *inode)
+{
+       return 0;
+}
+#endif
+
 #endif /* FS_NFS_PNFS_H */
index e6c262555e08a62aff65ef3baa04e9666e9f18c2..65869ca9c851dbf4f0b289ca84865a018c2b6e57 100644 (file)
@@ -1290,6 +1290,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
 static void nfs_redirty_request(struct nfs_page *req)
 {
        nfs_mark_request_dirty(req);
+       set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
        nfs_unlock_request(req);
        nfs_end_page_writeback(req);
        nfs_release_request(req);
@@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
        NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
-{
-       /* do nothing! */
-}
-
 /*
  * Special version of should_remove_suid() that ignores capabilities.
  */
@@ -1556,7 +1552,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
        /* Set up the initial task struct.  */
        nfs_ops->commit_setup(data, &msg);
 
-       dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+       dprintk("NFS: initiated commit call\n");
 
        nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
                NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
@@ -2013,7 +2009,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
        .rw_mode                = FMODE_WRITE,
        .rw_alloc_header        = nfs_writehdr_alloc,
        .rw_free_header         = nfs_writehdr_free,
-       .rw_release             = nfs_writeback_release_common,
        .rw_done                = nfs_writeback_done,
        .rw_result              = nfs_writeback_result,
        .rw_initiate            = nfs_initiate_write,
index 4506486974336d6451abc0658de78c0c06bdec9a..5b1e2a497e5114c26e556830f9f891c36130ffc7 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/fs.h> /* struct inode */
 #include <linux/fsnotify_backend.h>
 #include <linux/idr.h>
-#include <linux/init.h> /* module_init */
+#include <linux/init.h> /* fs_initcall */
 #include <linux/inotify.h>
 #include <linux/kernel.h> /* roundup() */
 #include <linux/namei.h> /* LOOKUP_FOLLOW */
@@ -812,4 +812,4 @@ static int __init inotify_user_setup(void)
 
        return 0;
 }
-module_init(inotify_user_setup);
+fs_initcall(inotify_user_setup);
index 907870e81a72e36f4c5abb29fd34e23b4307efc5..70e9af5516004d20188ca0757110fe5bf203d19f 100644 (file)
@@ -23,6 +23,7 @@ struct ovl_cache_entry {
        u64 ino;
        struct list_head l_node;
        struct rb_node node;
+       struct ovl_cache_entry *next_maybe_whiteout;
        bool is_whiteout;
        char name[];
 };
@@ -39,7 +40,7 @@ struct ovl_readdir_data {
        struct rb_root root;
        struct list_head *list;
        struct list_head middle;
-       struct dentry *dir;
+       struct ovl_cache_entry *first_maybe_whiteout;
        int count;
        int err;
 };
@@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
        return NULL;
 }
 
-static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir,
+static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
                                                   const char *name, int len,
                                                   u64 ino, unsigned int d_type)
 {
@@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir,
        p->is_whiteout = false;
 
        if (d_type == DT_CHR) {
-               struct dentry *dentry;
-               const struct cred *old_cred;
-               struct cred *override_cred;
-
-               override_cred = prepare_creds();
-               if (!override_cred) {
-                       kfree(p);
-                       return NULL;
-               }
-
-               /*
-                * CAP_DAC_OVERRIDE for lookup
-                */
-               cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
-               old_cred = override_creds(override_cred);
-
-               dentry = lookup_one_len(name, dir, len);
-               if (!IS_ERR(dentry)) {
-                       p->is_whiteout = ovl_is_whiteout(dentry);
-                       dput(dentry);
-               }
-               revert_creds(old_cred);
-               put_cred(override_cred);
+               p->next_maybe_whiteout = rdd->first_maybe_whiteout;
+               rdd->first_maybe_whiteout = p;
        }
        return p;
 }
@@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
                        return 0;
        }
 
-       p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type);
+       p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
        if (p == NULL)
                return -ENOMEM;
 
@@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd,
        if (p) {
                list_move_tail(&p->l_node, &rdd->middle);
        } else {
-               p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type);
+               p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
                if (p == NULL)
                        rdd->err = -ENOMEM;
                else
@@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name,
                return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
 }
 
+static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
+{
+       int err;
+       struct ovl_cache_entry *p;
+       struct dentry *dentry;
+       const struct cred *old_cred;
+       struct cred *override_cred;
+
+       override_cred = prepare_creds();
+       if (!override_cred)
+               return -ENOMEM;
+
+       /*
+        * CAP_DAC_OVERRIDE for lookup
+        */
+       cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+       old_cred = override_creds(override_cred);
+
+       err = mutex_lock_killable(&dir->d_inode->i_mutex);
+       if (!err) {
+               while (rdd->first_maybe_whiteout) {
+                       p = rdd->first_maybe_whiteout;
+                       rdd->first_maybe_whiteout = p->next_maybe_whiteout;
+                       dentry = lookup_one_len(p->name, dir, p->len);
+                       if (!IS_ERR(dentry)) {
+                               p->is_whiteout = ovl_is_whiteout(dentry);
+                               dput(dentry);
+                       }
+               }
+               mutex_unlock(&dir->d_inode->i_mutex);
+       }
+       revert_creds(old_cred);
+       put_cred(override_cred);
+
+       return err;
+}
+
 static inline int ovl_dir_read(struct path *realpath,
                               struct ovl_readdir_data *rdd)
 {
@@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath,
        if (IS_ERR(realfile))
                return PTR_ERR(realfile);
 
-       rdd->dir = realpath->dentry;
+       rdd->first_maybe_whiteout = NULL;
        rdd->ctx.pos = 0;
        do {
                rdd->count = 0;
@@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath,
                if (err >= 0)
                        err = rdd->err;
        } while (!err && rdd->count);
+
+       if (!err && rdd->first_maybe_whiteout)
+               err = ovl_check_whiteouts(realpath->dentry, rdd);
+
        fput(realfile);
 
        return err;
index bf8537c7f455207830046a50d67d394f86d37f4a..8a08c582bc22e400a16f395609200ed105d3933f 100644 (file)
@@ -273,10 +273,57 @@ static void ovl_dentry_release(struct dentry *dentry)
        }
 }
 
+static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       struct ovl_entry *oe = dentry->d_fsdata;
+       unsigned int i;
+       int ret = 1;
+
+       for (i = 0; i < oe->numlower; i++) {
+               struct dentry *d = oe->lowerstack[i].dentry;
+
+               if (d->d_flags & DCACHE_OP_REVALIDATE) {
+                       ret = d->d_op->d_revalidate(d, flags);
+                       if (ret < 0)
+                               return ret;
+                       if (!ret) {
+                               if (!(flags & LOOKUP_RCU))
+                                       d_invalidate(d);
+                               return -ESTALE;
+                       }
+               }
+       }
+       return 1;
+}
+
+static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       struct ovl_entry *oe = dentry->d_fsdata;
+       unsigned int i;
+       int ret = 1;
+
+       for (i = 0; i < oe->numlower; i++) {
+               struct dentry *d = oe->lowerstack[i].dentry;
+
+               if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
+                       ret = d->d_op->d_weak_revalidate(d, flags);
+                       if (ret <= 0)
+                               break;
+               }
+       }
+       return ret;
+}
+
 static const struct dentry_operations ovl_dentry_operations = {
        .d_release = ovl_dentry_release,
 };
 
+static const struct dentry_operations ovl_reval_dentry_operations = {
+       .d_release = ovl_dentry_release,
+       .d_revalidate = ovl_dentry_revalidate,
+       .d_weak_revalidate = ovl_dentry_weak_revalidate,
+};
+
 static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 {
        size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -288,6 +335,20 @@ static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
        return oe;
 }
 
+static bool ovl_dentry_remote(struct dentry *dentry)
+{
+       return dentry->d_flags &
+               (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
+}
+
+static bool ovl_dentry_weird(struct dentry *dentry)
+{
+       return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
+                                 DCACHE_MANAGE_TRANSIT |
+                                 DCACHE_OP_HASH |
+                                 DCACHE_OP_COMPARE);
+}
+
 static inline struct dentry *ovl_lookup_real(struct dentry *dir,
                                             struct qstr *name)
 {
@@ -303,6 +364,10 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
        } else if (!dentry->d_inode) {
                dput(dentry);
                dentry = NULL;
+       } else if (ovl_dentry_weird(dentry)) {
+               dput(dentry);
+               /* Don't support traversing automounts and other weirdness */
+               dentry = ERR_PTR(-EREMOTE);
        }
        return dentry;
 }
@@ -350,6 +415,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                        goto out;
 
                if (this) {
+                       if (unlikely(ovl_dentry_remote(this))) {
+                               dput(this);
+                               err = -EREMOTE;
+                               goto out;
+                       }
                        if (ovl_is_whiteout(this)) {
                                dput(this);
                                this = NULL;
@@ -694,25 +764,6 @@ static void ovl_unescape(char *s)
        }
 }
 
-static bool ovl_is_allowed_fs_type(struct dentry *root)
-{
-       const struct dentry_operations *dop = root->d_op;
-
-       /*
-        * We don't support:
-        *  - automount filesystems
-        *  - filesystems with revalidate (FIXME for lower layer)
-        *  - filesystems with case insensitive names
-        */
-       if (dop &&
-           (dop->d_manage || dop->d_automount ||
-            dop->d_revalidate || dop->d_weak_revalidate ||
-            dop->d_compare || dop->d_hash)) {
-               return false;
-       }
-       return true;
-}
-
 static int ovl_mount_dir_noesc(const char *name, struct path *path)
 {
        int err = -EINVAL;
@@ -727,7 +778,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
                goto out;
        }
        err = -EINVAL;
-       if (!ovl_is_allowed_fs_type(path->dentry)) {
+       if (ovl_dentry_weird(path->dentry)) {
                pr_err("overlayfs: filesystem on '%s' not supported\n", name);
                goto out_put;
        }
@@ -751,13 +802,21 @@ static int ovl_mount_dir(const char *name, struct path *path)
        if (tmp) {
                ovl_unescape(tmp);
                err = ovl_mount_dir_noesc(tmp, path);
+
+               if (!err)
+                       if (ovl_dentry_remote(path->dentry)) {
+                               pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
+                                      tmp);
+                               path_put(path);
+                               err = -EINVAL;
+                       }
                kfree(tmp);
        }
        return err;
 }
 
 static int ovl_lower_dir(const char *name, struct path *path, long *namelen,
-                        int *stack_depth)
+                        int *stack_depth, bool *remote)
 {
        int err;
        struct kstatfs statfs;
@@ -774,6 +833,9 @@ static int ovl_lower_dir(const char *name, struct path *path, long *namelen,
        *namelen = max(*namelen, statfs.f_namelen);
        *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 
+       if (ovl_dentry_remote(path->dentry))
+               *remote = true;
+
        return 0;
 
 out_put:
@@ -827,6 +889,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        unsigned int numlower;
        unsigned int stacklen = 0;
        unsigned int i;
+       bool remote = false;
        int err;
 
        err = -ENOMEM;
@@ -900,7 +963,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        lower = lowertmp;
        for (numlower = 0; numlower < stacklen; numlower++) {
                err = ovl_lower_dir(lower, &stack[numlower],
-                                   &ufs->lower_namelen, &sb->s_stack_depth);
+                                   &ufs->lower_namelen, &sb->s_stack_depth,
+                                   &remote);
                if (err)
                        goto out_put_lowerpath;
 
@@ -958,7 +1022,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        if (!ufs->upper_mnt)
                sb->s_flags |= MS_RDONLY;
 
-       sb->s_d_op = &ovl_dentry_operations;
+       if (remote)
+               sb->s_d_op = &ovl_reval_dentry_operations;
+       else
+               sb->s_d_op = &ovl_dentry_operations;
 
        err = -ENOMEM;
        oe = ovl_alloc_entry(numlower);
index 273de709495c1db522d46dad7f339677fc8df756..b52c0dc4b4925a1d29c6c8b2d55a1ed0c40b2b75 100644 (file)
@@ -51,6 +51,7 @@
 #define METHOD_NAME__BBN        "_BBN"
 #define METHOD_NAME__CBA        "_CBA"
 #define METHOD_NAME__CID        "_CID"
+#define METHOD_NAME__CLS        "_CLS"
 #define METHOD_NAME__CRS        "_CRS"
 #define METHOD_NAME__DDN        "_DDN"
 #define METHOD_NAME__HID        "_HID"
index a8f344363e7737d056bebc7ec7f4d45fa2e170f0..f56de8c5d844de3019d2e8fb8b04f5fedab2d58f 100644 (file)
 
 /* DEBUG_PRINT functions */
 
-#define ACPI_DEBUG_PRINT(plist)         ACPI_ACTUAL_DEBUG plist
-#define ACPI_DEBUG_PRINT_RAW(plist)     ACPI_ACTUAL_DEBUG_RAW plist
+#ifndef COMPILER_VA_MACRO
+
+#define ACPI_DEBUG_PRINT(plist)         acpi_debug_print plist
+#define ACPI_DEBUG_PRINT_RAW(plist)     acpi_debug_print_raw plist
+
+#else
 
 /* Helper macros for DEBUG_PRINT */
 
        ACPI_DO_DEBUG_PRINT (acpi_debug_print_raw, level, line, \
                filename, modulename, component, __VA_ARGS__)
 
+#define ACPI_DEBUG_PRINT(plist)         ACPI_ACTUAL_DEBUG plist
+#define ACPI_DEBUG_PRINT_RAW(plist)     ACPI_ACTUAL_DEBUG_RAW plist
+
+#endif
+
 /*
  * Function entry tracing
  *
index d68f1cd39c495f732b66c048d64cbe384162cd6d..e8ec18a4a634d8d5679a49757fcb616233ec5a9d 100644 (file)
@@ -46,7 +46,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20150515
+#define ACPI_CA_VERSION                 0x20150619
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
@@ -195,9 +195,18 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE);
  * address. Although ACPICA adheres to the ACPI specification which
  * requires the use of the corresponding 64-bit address if it is non-zero,
  * some machines have been found to have a corrupted non-zero 64-bit
- * address. Default is TRUE, favor the 32-bit addresses.
+ * address. Default is FALSE, do not favor the 32-bit addresses.
  */
-ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, FALSE);
+
+/*
+ * Optionally use 32-bit FACS table addresses.
+ * It is reported that some platforms fail to resume from system suspending
+ * if 64-bit FACS table address is selected:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=74021
+ * Default is TRUE, favor the 32-bit addresses.
+ */
+ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_facs_addresses, TRUE);
 
 /*
  * Optionally truncate I/O addresses to 16 bits. Provides compatibility
@@ -219,6 +228,11 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_auto_repair, FALSE);
  */
 ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_ssdt_table_install, FALSE);
 
+/*
+ * Optionally enable runtime namespace override.
+ */
+ACPI_INIT_GLOBAL(u8, acpi_gbl_runtime_namespace_override, TRUE);
+
 /*
  * We keep track of the latest version of Windows that has been requested by
  * the BIOS. ACPI 5.0.
@@ -814,8 +828,12 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state(u8 sleep_state))
 
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-                               acpi_set_firmware_waking_vector(u32
-                                                               physical_address))
+                               acpi_set_firmware_waking_vectors
+                               (acpi_physical_address physical_address,
+                                acpi_physical_address physical_address64))
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
+                                acpi_set_firmware_waking_vector(u32
+                                                                physical_address))
 #if ACPI_MACHINE_WIDTH == 64
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
                                acpi_set_firmware_waking_vector64(u64
index cb8a6b97cedae07b239fbc5eb671e1ddfe6d38ea..2d5faf508cadfd159a5b1213bc2bc16d66ae5535 100644 (file)
@@ -65,6 +65,7 @@
 #define ACPI_SIG_DSDT           "DSDT" /* Differentiated System Description Table */
 #define ACPI_SIG_FADT           "FACP" /* Fixed ACPI Description Table */
 #define ACPI_SIG_FACS           "FACS" /* Firmware ACPI Control Structure */
+#define ACPI_SIG_OSDT           "OSDT" /* Override System Description Table */
 #define ACPI_SIG_PSDT           "PSDT" /* Persistent System Description Table */
 #define ACPI_SIG_RSDP           "RSD PTR "     /* Root System Description Pointer */
 #define ACPI_SIG_RSDT           "RSDT" /* Root System Description Table */
index 06b61f01ea599177c0cba9b3172c89672d7aa63b..fcd570999f354247c9dbba73163797ff38cbd1a6 100644 (file)
@@ -835,6 +835,17 @@ struct acpi_madt_generic_distributor {
        u8 reserved2[3];        /* reserved - must be zero */
 };
 
+/* Values for Version field above */
+
+enum acpi_madt_gic_version {
+       ACPI_MADT_GIC_VERSION_NONE = 0,
+       ACPI_MADT_GIC_VERSION_V1 = 1,
+       ACPI_MADT_GIC_VERSION_V2 = 2,
+       ACPI_MADT_GIC_VERSION_V3 = 3,
+       ACPI_MADT_GIC_VERSION_V4 = 4,
+       ACPI_MADT_GIC_VERSION_RESERVED = 5      /* 5 and greater are reserved */
+};
+
 /* 13: Generic MSI Frame (ACPI 5.1) */
 
 struct acpi_madt_generic_msi_frame {
index 370d69d871a0d19054c6fd81bf33789670493f08..a948fc586b9b8406d44b5e35f4e50c1ed436830e 100644 (file)
@@ -51,8 +51,8 @@
  * These tables are not consumed directly by the ACPICA subsystem, but are
  * included here to support device drivers and the AML disassembler.
  *
- * The tables in this file are defined by third-party specifications, and are
- * not defined directly by the ACPI specification itself.
+ * Generally, the tables in this file are defined by third-party specifications,
+ * and are not defined directly by the ACPI specification itself.
  *
  ******************************************************************************/
 
@@ -80,6 +80,7 @@
 #define ACPI_SIG_SPCR           "SPCR" /* Serial Port Console Redirection table */
 #define ACPI_SIG_SPMI           "SPMI" /* Server Platform Management Interface table */
 #define ACPI_SIG_TCPA           "TCPA" /* Trusted Computing Platform Alliance table */
+#define ACPI_SIG_TPM2           "TPM2" /* Trusted Platform Module 2.0 H/W interface table */
 #define ACPI_SIG_UEFI           "UEFI" /* Uefi Boot Optimization Table */
 #define ACPI_SIG_VRTC           "VRTC" /* Virtual Real Time Clock Table */
 #define ACPI_SIG_WAET           "WAET" /* Windows ACPI Emulated devices Table */
@@ -1179,20 +1180,85 @@ enum acpi_spmi_interface_types {
 /*******************************************************************************
  *
  * TCPA - Trusted Computing Platform Alliance table
- *        Version 1
+ *        Version 2
+ *
+ * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0",
+ * December 19, 2014
  *
- * Conforms to "TCG PC Specific Implementation Specification",
- * Version 1.1, August 18, 2003
+ * NOTE: There are two versions of the table with the same signature --
+ * the client version and the server version.
  *
  ******************************************************************************/
 
-struct acpi_table_tcpa {
+struct acpi_table_tcpa_client {
        struct acpi_table_header header;        /* Common ACPI table header */
+       u16 platform_class;
+       u32 minimum_log_length; /* Minimum length for the event log area */
+       u64 log_address;        /* Address of the event log area */
+};
+
+struct acpi_table_tcpa_server {
+       struct acpi_table_header header;        /* Common ACPI table header */
+       u16 platform_class;
        u16 reserved;
-       u32 max_log_length;     /* Maximum length for the event log area */
+       u64 minimum_log_length; /* Minimum length for the event log area */
        u64 log_address;        /* Address of the event log area */
+       u16 spec_revision;
+       u8 device_flags;
+       u8 interrupt_flags;
+       u8 gpe_number;
+       u8 reserved2[3];
+       u32 global_interrupt;
+       struct acpi_generic_address address;
+       u32 reserved3;
+       struct acpi_generic_address config_address;
+       u8 group;
+       u8 bus;                 /* PCI Bus/Segment/Function numbers */
+       u8 device;
+       u8 function;
+};
+
+/* Values for device_flags above */
+
+#define ACPI_TCPA_PCI_DEVICE            (1)
+#define ACPI_TCPA_BUS_PNP               (1<<1)
+#define ACPI_TCPA_ADDRESS_VALID         (1<<2)
+
+/* Values for interrupt_flags above */
+
+#define ACPI_TCPA_INTERRUPT_MODE        (1)
+#define ACPI_TCPA_INTERRUPT_POLARITY    (1<<1)
+#define ACPI_TCPA_SCI_VIA_GPE           (1<<2)
+#define ACPI_TCPA_GLOBAL_INTERRUPT      (1<<3)
+
+/*******************************************************************************
+ *
+ * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table
+ *        Version 4
+ *
+ * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0",
+ * December 19, 2014
+ *
+ ******************************************************************************/
+
+struct acpi_table_tpm2 {
+       struct acpi_table_header header;        /* Common ACPI table header */
+       u16 platform_class;
+       u16 reserved;
+       u64 control_address;
+       u32 start_method;
+
+       /* Platform-specific data follows */
 };
 
+/* Values for start_method above */
+
+#define ACPI_TPM2_NOT_ALLOWED                       0
+#define ACPI_TPM2_START_METHOD                      2
+#define ACPI_TPM2_MEMORY_MAPPED                     6
+#define ACPI_TPM2_COMMAND_BUFFER                    7
+#define ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD  8
+
 /*******************************************************************************
  *
  * UEFI - UEFI Boot optimization Table
index 4018986d2a2e2b96f5bf2f493078561666b59e41..1df891660f4394e5f805e57455cbb662b9fac920 100644 (file)
@@ -51,7 +51,8 @@
  * These tables are not consumed directly by the ACPICA subsystem, but are
  * included here to support device drivers and the AML disassembler.
  *
- * The tables in this file are fully defined within the ACPI specification.
+ * In general, the tables in this file are fully defined within the ACPI
+ * specification.
  *
  ******************************************************************************/
 
@@ -69,7 +70,6 @@
 #define ACPI_SIG_PMTT           "PMTT" /* Platform Memory Topology Table */
 #define ACPI_SIG_RASF           "RASF" /* RAS Feature table */
 #define ACPI_SIG_STAO           "STAO" /* Status Override table */
-#define ACPI_SIG_TPM2           "TPM2" /* Trusted Platform Module 2.0 H/W interface table */
 #define ACPI_SIG_WPBT           "WPBT" /* Windows Platform Binary Table */
 #define ACPI_SIG_XENV           "XENV" /* Xen Environment table */
 
@@ -720,36 +720,6 @@ struct acpi_table_stao {
        u8 ignore_uart;
 };
 
-/*******************************************************************************
- *
- * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table
- *        Version 3
- *
- * Conforms to "TPM 2.0 Hardware Interface Table (TPM2)" 29 November 2011
- *
- ******************************************************************************/
-
-struct acpi_table_tpm2 {
-       struct acpi_table_header header;        /* Common ACPI table header */
-       u32 flags;
-       u64 control_address;
-       u32 start_method;
-};
-
-/* Control area structure (not part of table, pointed to by control_address) */
-
-struct acpi_tpm2_control {
-       u32 reserved;
-       u32 error;
-       u32 cancel;
-       u32 start;
-       u64 interrupt_control;
-       u32 command_size;
-       u64 command_address;
-       u32 response_size;
-       u64 response_address;
-};
-
 /*******************************************************************************
  *
  * WPBT - Windows Platform Environment Table (ACPI 6.0)
index 63fd7f5e9fb3495198e659c8b04a16db2e489877..c2a41d223162a3ef3936d40c111e89fd5a4c6600 100644 (file)
@@ -542,14 +542,14 @@ typedef u64 acpi_integer;
 #define ACPI_COMPARE_NAME(a,b)          (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
 #define ACPI_MOVE_NAME(dest,src)        (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src)))
 #else
-#define ACPI_COMPARE_NAME(a,b)          (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
-#define ACPI_MOVE_NAME(dest,src)        (ACPI_STRNCPY (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE))
+#define ACPI_COMPARE_NAME(a,b)          (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
+#define ACPI_MOVE_NAME(dest,src)        (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE))
 #endif
 
 /* Support for the special RSDP signature (8 characters) */
 
-#define ACPI_VALIDATE_RSDP_SIG(a)       (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8))
-#define ACPI_MAKE_RSDP_SIG(dest)        (ACPI_MEMCPY (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8))
+#define ACPI_VALIDATE_RSDP_SIG(a)       (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8))
+#define ACPI_MAKE_RSDP_SIG(dest)        (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8))
 
 /*******************************************************************************
  *
@@ -568,6 +568,7 @@ typedef u64 acpi_integer;
 #define ACPI_NO_ACPI_ENABLE             0x10
 #define ACPI_NO_DEVICE_INIT             0x20
 #define ACPI_NO_OBJECT_INIT             0x40
+#define ACPI_NO_FACS_INIT               0x80
 
 /*
  * Initialization state
@@ -1140,6 +1141,10 @@ u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported);
 
 #define ACPI_UUID_LENGTH                16
 
+/* Length of 3-byte PCI class code values when converted back to a string */
+
+#define ACPI_PCICLS_STRING_SIZE         7      /* Includes null terminator */
+
 /* Structures used for device/processor HID, UID, CID, and SUB */
 
 struct acpi_pnp_device_id {
@@ -1162,7 +1167,7 @@ struct acpi_device_info {
        u32 name;               /* ACPI object Name */
        acpi_object_type type;  /* ACPI object Type */
        u8 param_count;         /* If a method, required parameter count */
-       u8 valid;               /* Indicates which optional fields are valid */
+       u16 valid;              /* Indicates which optional fields are valid */
        u8 flags;               /* Miscellaneous info */
        u8 highest_dstates[4];  /* _sx_d values: 0xFF indicates not valid */
        u8 lowest_dstates[5];   /* _sx_w values: 0xFF indicates not valid */
@@ -1171,6 +1176,7 @@ struct acpi_device_info {
        struct acpi_pnp_device_id hardware_id;  /* _HID value */
        struct acpi_pnp_device_id unique_id;    /* _UID value */
        struct acpi_pnp_device_id subsystem_id; /* _SUB value */
+       struct acpi_pnp_device_id class_code;   /* _CLS value */
        struct acpi_pnp_device_id_list compatible_id_list;      /* _CID list <must be last> */
 };
 
@@ -1180,14 +1186,15 @@ struct acpi_device_info {
 
 /* Flags for Valid field above (acpi_get_object_info) */
 
-#define ACPI_VALID_STA                  0x01
-#define ACPI_VALID_ADR                  0x02
-#define ACPI_VALID_HID                  0x04
-#define ACPI_VALID_UID                  0x08
-#define ACPI_VALID_SUB                  0x10
-#define ACPI_VALID_CID                  0x20
-#define ACPI_VALID_SXDS                 0x40
-#define ACPI_VALID_SXWS                 0x80
+#define ACPI_VALID_STA                  0x0001
+#define ACPI_VALID_ADR                  0x0002
+#define ACPI_VALID_HID                  0x0004
+#define ACPI_VALID_UID                  0x0008
+#define ACPI_VALID_SUB                  0x0010
+#define ACPI_VALID_CID                  0x0020
+#define ACPI_VALID_CLS                  0x0040
+#define ACPI_VALID_SXDS                 0x0100
+#define ACPI_VALID_SXWS                 0x0200
 
 /* Flags for _STA return value (current_status above) */
 
index 073997d729e9c9710c10656ff3f34c6e30527c34..3cedd43943f42a8465772def8b68f3f93f322676 100644 (file)
 
 /* We will be linking to the standard Clib functions */
 
-#define ACPI_STRSTR(s1,s2)      strstr((s1), (s2))
-#define ACPI_STRCHR(s1,c)       strchr((s1), (c))
-#define ACPI_STRLEN(s)          (acpi_size) strlen((s))
-#define ACPI_STRCPY(d,s)        (void) strcpy((d), (s))
-#define ACPI_STRNCPY(d,s,n)     (void) strncpy((d), (s), (acpi_size)(n))
-#define ACPI_STRNCMP(d,s,n)     strncmp((d), (s), (acpi_size)(n))
-#define ACPI_STRCMP(d,s)        strcmp((d), (s))
-#define ACPI_STRCAT(d,s)        (void) strcat((d), (s))
-#define ACPI_STRNCAT(d,s,n)     strncat((d), (s), (acpi_size)(n))
-#define ACPI_STRTOUL(d,s,n)     strtoul((d), (s), (acpi_size)(n))
-#define ACPI_MEMCMP(s1,s2,n)    memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n))
-#define ACPI_MEMCPY(d,s,n)      (void) memcpy((d), (s), (acpi_size)(n))
-#define ACPI_MEMSET(d,s,n)      (void) memset((d), (s), (acpi_size)(n))
-
-#define ACPI_TOUPPER(i)         toupper((int) (i))
-#define ACPI_TOLOWER(i)         tolower((int) (i))
-#define ACPI_IS_XDIGIT(i)       isxdigit((int) (i))
-#define ACPI_IS_DIGIT(i)        isdigit((int) (i))
-#define ACPI_IS_SPACE(i)        isspace((int) (i))
-#define ACPI_IS_UPPER(i)        isupper((int) (i))
-#define ACPI_IS_PRINT(i)        isprint((int) (i))
-#define ACPI_IS_ALPHA(i)        isalpha((int) (i))
-
 #else
 
 /******************************************************************************
@@ -406,22 +383,6 @@ typedef char *va_list;
 
 /* Use the local (ACPICA) definitions of the clib functions */
 
-#define ACPI_STRSTR(s1,s2)      acpi_ut_strstr ((s1), (s2))
-#define ACPI_STRCHR(s1,c)       acpi_ut_strchr ((s1), (c))
-#define ACPI_STRLEN(s)          (acpi_size) acpi_ut_strlen ((s))
-#define ACPI_STRCPY(d,s)        (void) acpi_ut_strcpy ((d), (s))
-#define ACPI_STRNCPY(d,s,n)     (void) acpi_ut_strncpy ((d), (s), (acpi_size)(n))
-#define ACPI_STRNCMP(d,s,n)     acpi_ut_strncmp ((d), (s), (acpi_size)(n))
-#define ACPI_STRCMP(d,s)        acpi_ut_strcmp ((d), (s))
-#define ACPI_STRCAT(d,s)        (void) acpi_ut_strcat ((d), (s))
-#define ACPI_STRNCAT(d,s,n)     acpi_ut_strncat ((d), (s), (acpi_size)(n))
-#define ACPI_STRTOUL(d,s,n)     acpi_ut_strtoul ((d), (s), (acpi_size)(n))
-#define ACPI_MEMCMP(s1,s2,n)    acpi_ut_memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n))
-#define ACPI_MEMCPY(d,s,n)      (void) acpi_ut_memcpy ((d), (s), (acpi_size)(n))
-#define ACPI_MEMSET(d,v,n)      (void) acpi_ut_memset ((d), (v), (acpi_size)(n))
-#define ACPI_TOUPPER(c)         acpi_ut_to_upper ((int) (c))
-#define ACPI_TOLOWER(c)         acpi_ut_to_lower ((int) (c))
-
 #endif                         /* ACPI_USE_SYSTEM_CLIBRARY */
 
 #ifndef ACPI_FILE
index 14dc6f68ca1854de0095e1d572d5e6de36abf7b5..0a7dc8e583b1c2d742e691cf6e4c00ccc0154f5a 100644 (file)
 #if defined(_LINUX) || defined(__linux__)
 #include <acpi/platform/aclinuxex.h>
 
+#elif defined(_AED_EFI)
+#include "acefiex.h"
+
+#elif defined(_GNU_EFI)
+#include "acefiex.h"
+
 #elif defined(__DragonFly__)
 #include "acdragonflyex.h"
 
index f54de0a635582d45b3aca6bb1977313d431e0549..5457a06cb52879f2775ed64630f63025ca3fbb9c 100644 (file)
@@ -75,4 +75,8 @@
 #undef strchr
 #endif
 
+/* GCC supports __VA_ARGS__ in macros */
+
+#define COMPILER_VA_MACRO               1
+
 #endif                         /* __ACGCC_H__ */
index 30f92cefaa721d040d5f1e2d92ec16429f8c00e3..9ebee53d3bf586ef80690fa778c0a3e030e410f1 100644 (file)
@@ -43,9 +43,9 @@ struct ceph_options {
        int flags;
        struct ceph_fsid fsid;
        struct ceph_entity_addr my_addr;
-       int mount_timeout;
-       int osd_idle_ttl;
-       int osd_keepalive_timeout;
+       unsigned long mount_timeout;            /* jiffies */
+       unsigned long osd_idle_ttl;             /* jiffies */
+       unsigned long osd_keepalive_timeout;    /* jiffies */
 
        /*
         * any type that can't be simply compared or doesn't need need
@@ -63,9 +63,9 @@ struct ceph_options {
 /*
  * defaults
  */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_KEEPALIVE_DEFAULT  5
-#define CEPH_OSD_IDLE_TTL_DEFAULT    60
+#define CEPH_MOUNT_TIMEOUT_DEFAULT     msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_KEEPALIVE_DEFAULT     msecs_to_jiffies(5 * 1000)
+#define CEPH_OSD_IDLE_TTL_DEFAULT      msecs_to_jiffies(60 * 1000)
 
 #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN        (16*1024*1024)
@@ -93,13 +93,9 @@ enum {
        CEPH_MOUNT_SHUTDOWN,
 };
 
-/*
- * subtract jiffies
- */
-static inline unsigned long time_sub(unsigned long a, unsigned long b)
+static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
 {
-       BUG_ON(time_after(b, a));
-       return (long)a - (long)b;
+       return timeout ?: MAX_SCHEDULE_TIMEOUT;
 }
 
 struct ceph_mds_client;
@@ -178,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len)
 
 extern struct kmem_cache *ceph_inode_cachep;
 extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_cap_flush_cachep;
 extern struct kmem_cache *ceph_dentry_cachep;
 extern struct kmem_cache *ceph_file_cachep;
 
index 61b19c46bdb33d5fc2f4752df0c345350fa739e8..7506b485bb6d1d4cee0aff00cbe1132d55396071 100644 (file)
@@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
                                 struct ceph_msg *msg);
 
 extern void osd_req_op_init(struct ceph_osd_request *osd_req,
-                                       unsigned int which, u16 opcode);
+                           unsigned int which, u16 opcode, u32 flags);
 
 extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
                                        unsigned int which,
index 48a1a7d100f190efae067afa7866b75e10eff9fe..48b49305716bd728e69477db6b430c34e7781746 100644 (file)
@@ -1,7 +1,11 @@
 #ifndef CEPH_CRUSH_CRUSH_H
 #define CEPH_CRUSH_CRUSH_H
 
-#include <linux/types.h>
+#ifdef __KERNEL__
+# include <linux/types.h>
+#else
+# include "crush_compat.h"
+#endif
 
 /*
  * CRUSH is a pseudo-random data distribution algorithm that
 #define CRUSH_MAGIC 0x00010000ul   /* for detecting algorithm revisions */
 
 #define CRUSH_MAX_DEPTH 10  /* max crush hierarchy depth */
+#define CRUSH_MAX_RULESET (1<<8)  /* max crush ruleset number */
+#define CRUSH_MAX_RULES CRUSH_MAX_RULESET  /* should be the same as max rulesets */
 
+#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u)
+#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u)
 
 #define CRUSH_ITEM_UNDEF  0x7ffffffe  /* undefined result (internal use only) */
 #define CRUSH_ITEM_NONE   0x7fffffff  /* no result */
@@ -108,6 +116,15 @@ enum {
 };
 extern const char *crush_bucket_alg_name(int alg);
 
+/*
+ * although tree was a legacy algorithm, it has been buggy, so
+ * exclude it.
+ */
+#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS (     \
+               (1 << CRUSH_BUCKET_UNIFORM) |   \
+               (1 << CRUSH_BUCKET_LIST) |      \
+               (1 << CRUSH_BUCKET_STRAW))
+
 struct crush_bucket {
        __s32 id;        /* this'll be negative */
        __u16 type;      /* non-zero; type=0 is reserved for devices */
@@ -174,7 +191,7 @@ struct crush_map {
        /* choose local attempts using a fallback permutation before
         * re-descent */
        __u32 choose_local_fallback_tries;
-       /* choose attempts before giving up */ 
+       /* choose attempts before giving up */
        __u32 choose_total_tries;
        /* attempt chooseleaf inner descent once for firstn mode; on
         * reject retry outer descent.  Note that this does *not*
@@ -187,6 +204,25 @@ struct crush_map {
         * that want to limit reshuffling, a value of 3 or 4 will make the
         * mappings line up a bit better with previous mappings. */
        __u8 chooseleaf_vary_r;
+
+#ifndef __KERNEL__
+       /*
+        * version 0 (original) of straw_calc has various flaws.  version 1
+        * fixes a few of them.
+        */
+       __u8 straw_calc_version;
+
+       /*
+        * allowed bucket algs is a bitmask, here the bit positions
+        * are CRUSH_BUCKET_*.  note that these are *bits* and
+        * CRUSH_BUCKET_* values are not, so we need to or together (1
+        * << CRUSH_BUCKET_WHATEVER).  The 0th bit is not used to
+        * minimize confusion (bucket type values start at 1).
+        */
+       __u32 allowed_bucket_algs;
+
+       __u32 *choose_tries;
+#endif
 };
 
 
index 91e884230d5db99cbfacc478bf8acbe2d4533908..d1d90258242eef2965eb3e1e1e399132e85c8c34 100644 (file)
@@ -1,6 +1,12 @@
 #ifndef CEPH_CRUSH_HASH_H
 #define CEPH_CRUSH_HASH_H
 
+#ifdef __KERNEL__
+# include <linux/types.h>
+#else
+# include "crush_compat.h"
+#endif
+
 #define CRUSH_HASH_RJENKINS1   0
 
 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
index eab367446eea7fa683cb4fd15e74ad3822bb35c8..5dfd5b1125d2b257a4a00d1e77661613ca2227ec 100644 (file)
@@ -8,7 +8,7 @@
  * LGPL2
  */
 
-#include <linux/crush/crush.h>
+#include "crush.h"
 
 extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
 extern int crush_do_rule(const struct crush_map *map,
index 00ac57c26615103b0983040981cac8012c9e2f99..5a31bf3a40243e1faff078fdc509361568143f01 100644 (file)
@@ -1300,4 +1300,26 @@ static void __exit __driver##_exit(void) \
 } \
 module_exit(__driver##_exit);
 
+/**
+ * builtin_driver() - Helper macro for drivers that don't do anything
+ * special in init and have no exit. This eliminates some boilerplate.
+ * Each driver may only use this macro once, and calling it replaces
+ * device_initcall (or in some cases, the legacy __initcall).  This is
+ * meant to be a direct parallel of module_driver() above but without
+ * the __exit stuff that is not used for builtin cases.
+ *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @...: Additional arguments to be passed to __register
+ *
+ * Use this macro to construct bus specific macros for registering
+ * drivers, and do not use it on its own.
+ */
+#define builtin_driver(__driver, __register, ...) \
+static int __init __driver##_init(void) \
+{ \
+       return __register(&(__driver) , ##__VA_ARGS__); \
+} \
+device_initcall(__driver##_init);
+
 #endif /* _DEVICE_H_ */
index 21b6d768edd7a4e0f1aee98ed9f437000fa1b996..7c68c36d3fd88788f043447c5bed889cdd408bb6 100644 (file)
 
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
-/* temporary, until all users are removed */
-#define __cpuinit
-#define __cpuinitdata
-#define __cpuinitconst
-#define __cpuexit
-#define __cpuexitdata
-#define __cpuexitconst
-
 /* Used for MEMORY_HOTPLUG */
 #define __meminit        __section(.meminit.text) __cold notrace
 #define __meminitdata    __section(.meminit.data)
 #define __INITRODATA   .section        ".init.rodata","a",%progbits
 #define __FINITDATA    .previous
 
-/* temporary, until all users are removed */
-#define __CPUINIT
-
 #define __MEMINIT        .section      ".meminit.text", "ax"
 #define __MEMINITDATA    .section      ".meminit.data", "aw"
 #define __MEMINITRODATA  .section      ".meminit.rodata", "a"
index 32201c269890433817f1fac83026d95fa760b996..b8e72aad919cfc72ea6710ac3786dc35bbe47201 100644 (file)
@@ -500,6 +500,7 @@ enum {
        NFSPROC4_CLNT_SEEK,
        NFSPROC4_CLNT_ALLOCATE,
        NFSPROC4_CLNT_DEALLOCATE,
+       NFSPROC4_CLNT_LAYOUTSTATS,
 };
 
 /* nfs41 types */
index b95f914ce083891325b6b8defa3f9995851cf76b..f91b5ade30c98fe8b03d06bc40deb0c434edb633 100644 (file)
@@ -219,6 +219,7 @@ struct nfs_inode {
 #define NFS_INO_COMMIT         (7)             /* inode is committing unstable writes */
 #define NFS_INO_LAYOUTCOMMIT   (9)             /* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)          /* layoutcommit inflight */
+#define NFS_INO_LAYOUTSTATS    (11)            /* layoutstats inflight */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
index 5e1273d4de14064198489a7aaccb73a88f36e7b8..a2ea1491d3dfc487611445490fb10adf9972d777 100644 (file)
@@ -237,5 +237,6 @@ struct nfs_server {
 #define NFS_CAP_SEEK           (1U << 19)
 #define NFS_CAP_ALLOCATE       (1U << 20)
 #define NFS_CAP_DEALLOCATE     (1U << 21)
+#define NFS_CAP_LAYOUTSTATS    (1U << 22)
 
 #endif
index 3eb072dbce833dd268b0189b9ff98a2aa85cc7bc..f2f650f136ee6fe181dfa27b71d7b944fc1f9357 100644 (file)
@@ -67,7 +67,6 @@ struct nfs_rw_ops {
        const fmode_t rw_mode;
        struct nfs_pgio_header *(*rw_alloc_header)(void);
        void (*rw_free_header)(struct nfs_pgio_header *);
-       void (*rw_release)(struct nfs_pgio_header *);
        int  (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
                        struct inode *);
        void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
index 93ab6071bbe967b56ea44c56111157f8a9135e3f..7bbe50504211d65cc096baa8bc6d45e2e9449125 100644 (file)
@@ -316,6 +316,49 @@ struct nfs4_layoutreturn {
        int rpc_status;
 };
 
+#define PNFS_LAYOUTSTATS_MAXSIZE 256
+
+struct nfs42_layoutstat_args;
+struct nfs42_layoutstat_devinfo;
+typedef        void (*layoutstats_encode_t)(struct xdr_stream *,
+               struct nfs42_layoutstat_args *,
+               struct nfs42_layoutstat_devinfo *);
+
+/* Per file per deviceid layoutstats */
+struct nfs42_layoutstat_devinfo {
+       struct nfs4_deviceid dev_id;
+       __u64 offset;
+       __u64 length;
+       __u64 read_count;
+       __u64 read_bytes;
+       __u64 write_count;
+       __u64 write_bytes;
+       __u32 layout_type;
+       layoutstats_encode_t layoutstats_encode;
+       void *layout_private;
+};
+
+struct nfs42_layoutstat_args {
+       struct nfs4_sequence_args seq_args;
+       struct nfs_fh *fh;
+       struct inode *inode;
+       nfs4_stateid stateid;
+       int num_dev;
+       struct nfs42_layoutstat_devinfo *devinfo;
+};
+
+struct nfs42_layoutstat_res {
+       struct nfs4_sequence_res seq_res;
+       int num_dev;
+       int rpc_status;
+};
+
+struct nfs42_layoutstat_data {
+       struct inode *inode;
+       struct nfs42_layoutstat_args args;
+       struct nfs42_layoutstat_res res;
+};
+
 struct stateowner_id {
        __u64   create_time;
        __u32   uniquifier;
@@ -984,17 +1027,14 @@ struct nfs4_readlink_res {
        struct nfs4_sequence_res        seq_res;
 };
 
-#define NFS4_SETCLIENTID_NAMELEN       (127)
 struct nfs4_setclientid {
        const nfs4_verifier *           sc_verifier;
-       unsigned int                    sc_name_len;
-       char                            sc_name[NFS4_SETCLIENTID_NAMELEN + 1];
        u32                             sc_prog;
        unsigned int                    sc_netid_len;
        char                            sc_netid[RPCBIND_MAXNETIDLEN + 1];
        unsigned int                    sc_uaddr_len;
        char                            sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
-       u32                             sc_cb_ident;
+       struct nfs_client               *sc_clnt;
        struct rpc_cred                 *sc_cred;
 };
 
@@ -1142,12 +1182,9 @@ struct nfs41_state_protection {
        struct nfs4_op_map allow;
 };
 
-#define NFS4_EXCHANGE_ID_LEN   (48)
 struct nfs41_exchange_id_args {
        struct nfs_client               *client;
        nfs4_verifier                   *verifier;
-       unsigned int                    id_len;
-       char                            id[NFS4_EXCHANGE_ID_LEN];
        u32                             flags;
        struct nfs41_state_protection   state_protect;
 };
index 58f1e75ba105ca9096f8fbf1d60b808e08087c03..bba08f44cc97da8f881fba53a9d20c4e80ee2969 100644 (file)
@@ -222,6 +222,15 @@ static inline void platform_set_drvdata(struct platform_device *pdev,
        module_driver(__platform_driver, platform_driver_register, \
                        platform_driver_unregister)
 
+/* builtin_platform_driver() - Helper macro for builtin drivers that
+ * don't do anything special in driver init.  This eliminates some
+ * boilerplate.  Each driver may only use this macro once, and
+ * calling it replaces device_initcall().  Note this is meant to be
+ * a parallel of module_platform_driver() above, but w/o _exit stuff.
+ */
+#define builtin_platform_driver(__platform_driver) \
+       builtin_driver(__platform_driver, platform_driver_register)
+
 /* module_platform_driver_probe() - Helper macro for drivers that don't do
  * anything special in module init/exit.  This eliminates a lot of
  * boilerplate.  Each module may only use this macro once, and
@@ -240,6 +249,20 @@ static void __exit __platform_driver##_exit(void) \
 } \
 module_exit(__platform_driver##_exit);
 
+/* builtin_platform_driver_probe() - Helper macro for drivers that don't do
+ * anything special in device init.  This eliminates some boilerplate.  Each
+ * driver may only use this macro once, and using it replaces device_initcall.
+ * This is meant to be a parallel of module_platform_driver_probe above, but
+ * without the __exit parts.
+ */
+#define builtin_platform_driver_probe(__platform_driver, __platform_probe) \
+static int __init __platform_driver##_init(void) \
+{ \
+       return platform_driver_probe(&(__platform_driver), \
+                                    __platform_probe);    \
+} \
+device_initcall(__platform_driver##_init); \
+
 #define platform_create_bundle(driver, probe, res, n_res, data, size) \
        __platform_create_bundle(driver, probe, res, n_res, data, size, THIS_MODULE)
 extern struct platform_device *__platform_create_bundle(
index 2ca67b55e0fe2f0abf7e432ee8402608b6d57201..8df43c9f11dc295889639364101f324150d769ff 100644 (file)
@@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
-int bc_send(struct rpc_rqst *req);
 
 /*
  * Determine if a shared backchannel is in use
index 598ba80ec30c974f02477a216a21077213a25aa5..131032f15cc187e0c7cbd0df80a08f4c44220800 100644 (file)
@@ -56,6 +56,7 @@ struct rpc_clnt {
        struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
        const struct rpc_timeout *cl_timeout;   /* Timeout strategy */
 
+       atomic_t                cl_swapper;     /* swapfile count */
        int                     cl_nodelen;     /* nodename length */
        char                    cl_nodename[UNX_MAXNODENAME+1];
        struct rpc_pipe_dir_head cl_pipedir_objects;
index 5f1e6bd4c316d143751d19aac15af77aa7c1ac21..d703f0ef37d8f87436310247c19ca37f60ea692b 100644 (file)
@@ -205,8 +205,7 @@ struct rpc_wait_queue {
  */
 struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
 struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
-                               const struct rpc_call_ops *ops);
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
 void           rpc_put_task(struct rpc_task *);
 void           rpc_put_task_async(struct rpc_task *);
 void           rpc_exit_task(struct rpc_task *);
@@ -269,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q,
 }
 #endif
 
+#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+int rpc_clnt_swap_activate(struct rpc_clnt *clnt);
+void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt);
+#else
+static inline int
+rpc_clnt_swap_activate(struct rpc_clnt *clnt)
+{
+       return -EINVAL;
+}
+
+static inline void
+rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
+{
+}
+#endif /* CONFIG_SUNRPC_SWAP */
+
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
index 8b93ef53df3c95df08625bef6ba58341d583e1e0..0fb9acbb478095b576445d74d264ad5e5193a55e 100644 (file)
@@ -133,6 +133,9 @@ struct rpc_xprt_ops {
        void            (*close)(struct rpc_xprt *xprt);
        void            (*destroy)(struct rpc_xprt *xprt);
        void            (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
+       int             (*enable_swap)(struct rpc_xprt *xprt);
+       void            (*disable_swap)(struct rpc_xprt *xprt);
+       void            (*inject_disconnect)(struct rpc_xprt *xprt);
 };
 
 /*
@@ -180,7 +183,7 @@ struct rpc_xprt {
        atomic_t                num_reqs;       /* total slots */
        unsigned long           state;          /* transport state */
        unsigned char           resvport   : 1; /* use a reserved port */
-       unsigned int            swapper;        /* we're swapping over this
+       atomic_t                swapper;        /* we're swapping over this
                                                   transport */
        unsigned int            bind_index;     /* bind function index */
 
@@ -212,7 +215,8 @@ struct rpc_xprt {
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct svc_serv         *bc_serv;       /* The RPC service which will */
                                                /* process the callback */
-       unsigned int            bc_alloc_count; /* Total number of preallocs */
+       int                     bc_alloc_count; /* Total number of preallocs */
+       atomic_t                bc_free_slots;
        spinlock_t              bc_pa_lock;     /* Protects the preallocated
                                                 * items */
        struct list_head        bc_pa_list;     /* List of preallocated
@@ -241,6 +245,7 @@ struct rpc_xprt {
        const char              *address_strings[RPC_DISPLAY_MAX];
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
        struct dentry           *debugfs;               /* debugfs directory */
+       atomic_t                inject_disconnect;
 #endif
 };
 
@@ -327,6 +332,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *
        return p + xprt->tsh_size;
 }
 
+static inline int
+xprt_enable_swap(struct rpc_xprt *xprt)
+{
+       return xprt->ops->enable_swap(xprt);
+}
+
+static inline void
+xprt_disable_swap(struct rpc_xprt *xprt)
+{
+       xprt->ops->disable_swap(xprt);
+}
+
 /*
  * Transport switch helper functions
  */
@@ -345,7 +362,6 @@ void                        xprt_release_rqst_cong(struct rpc_task *task);
 void                   xprt_disconnect_done(struct rpc_xprt *xprt);
 void                   xprt_force_disconnect(struct rpc_xprt *xprt);
 void                   xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
-int                    xs_swapper(struct rpc_xprt *xprt, int enable);
 
 bool                   xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *);
 void                   xprt_unlock_connect(struct rpc_xprt *, void *);
@@ -431,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
        return test_and_set_bit(XPRT_BINDING, &xprt->state);
 }
 
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+extern unsigned int rpc_inject_disconnect;
+static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+       if (!rpc_inject_disconnect)
+               return;
+       if (atomic_dec_return(&xprt->inject_disconnect))
+               return;
+       atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
+       xprt->ops->inject_disconnect(xprt);
+}
+#else
+static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+}
+#endif
+
 #endif /* __KERNEL__*/
 
 #endif /* _LINUX_SUNRPC_XPRT_H */
index c984c85981eae2881ebdac43ffdbfba9b55b70b4..b17613052cc3fd9d8827ede1944d3489bdd2285d 100644 (file)
@@ -56,7 +56,8 @@
 
 #define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
 
-/* memory registration strategies */
+/* Memory registration strategies, by number.
+ * This is part of a kernel / user space API. Do not remove. */
 enum rpcrdma_memreg {
        RPCRDMA_BOUNCEBUFFERS = 0,
        RPCRDMA_REGISTER,
index d3f4832db289b5da2787427360e7b933677d9fd5..b6fce900a8334613f45f169a0c28802327222f56 100644 (file)
@@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op {
 #define AMDGPU_VA_OP_MAP                       1
 #define AMDGPU_VA_OP_UNMAP                     2
 
+/* Delay the page table update till the next CS */
+#define AMDGPU_VM_DELAY_UPDATE         (1 << 0)
+
 /* Mapping flags */
 /* readable mapping */
 #define AMDGPU_VM_PAGE_READABLE                (1 << 1)
@@ -348,6 +351,7 @@ struct drm_amdgpu_gem_va {
 
 #define AMDGPU_CHUNK_ID_IB             0x01
 #define AMDGPU_CHUNK_ID_FENCE          0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES   0x03
 
 struct drm_amdgpu_cs_chunk {
        uint32_t                chunk_id;
@@ -399,6 +403,14 @@ struct drm_amdgpu_cs_chunk_ib {
        uint32_t ring;
 };
 
+struct drm_amdgpu_cs_chunk_dep {
+       uint32_t ip_type;
+       uint32_t ip_instance;
+       uint32_t ring;
+       uint32_t ctx_id;
+       uint64_t handle;
+};
+
 struct drm_amdgpu_cs_chunk_fence {
        uint32_t handle;
        uint32_t offset;
index 25084a052a1eff964d19a9683d5d6470590e4c7e..c9aca042e61d197927409531af4f8cdd88218adc 100644 (file)
@@ -755,4 +755,7 @@ struct fuse_notify_retrieve_in {
        uint64_t        dummy4;
 };
 
+/* Device ioctls: */
+#define FUSE_DEV_IOC_CLONE     _IOR(229, 0, uint32_t)
+
 #endif /* _LINUX_FUSE_H */
index 60c302cfb4d3cbb976f0b2d69f28a96899b1d4ce..43c4c920f30a92ca73e16d8ac3c9249b9a4ae4ca 100644 (file)
@@ -137,7 +137,7 @@ endif
 
 ifneq ($(wildcard $(obj)/.x509.list),)
 ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
-$(info X.509 certificate list changed)
+$(warning X.509 certificate list changed to "$(X509_CERTIFICATES)" from "$(shell cat $(obj)/.x509.list)")
 $(shell rm $(obj)/.x509.list)
 endif
 endif
index ff37c8c2f7b24fbaa0bb6e04faa8aef5edd3dbbc..6897b527581a8d9fcb65b4a2e01815d3d34498ed 100644 (file)
@@ -45,6 +45,9 @@ CFLAGS_kobject.o += -DDEBUG
 CFLAGS_kobject_uevent.o += -DDEBUG
 endif
 
+obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o
+CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any)
+
 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
 obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
 obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
diff --git a/lib/debug_info.c b/lib/debug_info.c
new file mode 100644 (file)
index 0000000..2edbe27
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * This file exists solely to ensure debug information for some core
+ * data structures is included in the final image even for
+ * CONFIG_DEBUG_INFO_REDUCED. Please do not add actual code. However,
+ * adding appropriate #includes is fine.
+ */
+#include <stdarg.h>
+
+#include <linux/cred.h>
+#include <linux/crypto.h>
+#include <linux/dcache.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/fscache-cache.h>
+#include <linux/io.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <net/addrconf.h>
+#include <net/sock.h>
+#include <net/tcp.h>
index b29015102698b393e0fdb84e2f7e6170d1e2a0a9..3fe401067e20ba81c762fb00ae00730de6b9502f 100644 (file)
@@ -289,5 +289,5 @@ exit:
        kfree(elts);
        return err;
 }
-module_init(list_sort_test);
+late_initcall(list_sort_test);
 #endif /* CONFIG_TEST_LIST_SORT */
index 05e7447d960b0628d9dea7ea22a02a0f15488b9d..58ea3643b9e9968a723f498d7df55b34c51a179e 100644 (file)
@@ -2085,7 +2085,7 @@ static int __meminit init_user_reserve(void)
        sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
        return 0;
 }
-module_init(init_user_reserve)
+subsys_initcall(init_user_reserve);
 
 /*
  * Initialise sysctl_admin_reserve_kbytes.
@@ -2106,4 +2106,4 @@ static int __meminit init_admin_reserve(void)
        sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
        return 0;
 }
-module_init(init_admin_reserve)
+subsys_initcall(init_admin_reserve);
index 0993f5f36b011ec567e3b559f50bb4dafde90479..bd5f842b56d26aca3cf24225884d5a2eacd1580c 100644 (file)
@@ -310,4 +310,4 @@ static int __init pageowner_init(void)
 
        return 0;
 }
-module_init(pageowner_init)
+late_initcall(pageowner_init)
index 79e8f71aef5be312ab7aa547293fd047cf42e97c..cb7db320dd276aa0107d8e484270e3e8fb61dd6a 100644 (file)
@@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name,
        /* start with defaults */
        opt->flags = CEPH_OPT_DEFAULT;
        opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
-       opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
-       opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
+       opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
+       opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
 
        /* get mon ip(s) */
        /* ip1[:port1][,ip2[:port2]...] */
@@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name,
                        pr_warn("ignoring deprecated osdtimeout option\n");
                        break;
                case Opt_osdkeepalivetimeout:
-                       opt->osd_keepalive_timeout = intval;
+                       /* 0 isn't well defined right now, reject it */
+                       if (intval < 1 || intval > INT_MAX / 1000) {
+                               pr_err("osdkeepalive out of range\n");
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       opt->osd_keepalive_timeout =
+                                       msecs_to_jiffies(intval * 1000);
                        break;
                case Opt_osd_idle_ttl:
-                       opt->osd_idle_ttl = intval;
+                       /* 0 isn't well defined right now, reject it */
+                       if (intval < 1 || intval > INT_MAX / 1000) {
+                               pr_err("osd_idle_ttl out of range\n");
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
                        break;
                case Opt_mount_timeout:
-                       opt->mount_timeout = intval;
+                       /* 0 is "wait forever" (i.e. infinite timeout) */
+                       if (intval < 0 || intval > INT_MAX / 1000) {
+                               pr_err("mount_timeout out of range\n");
+                               err = -EINVAL;
+                               goto out;
+                       }
+                       opt->mount_timeout = msecs_to_jiffies(intval * 1000);
                        break;
 
                case Opt_share:
@@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
                seq_puts(m, "notcp_nodelay,");
 
        if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-               seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+               seq_printf(m, "mount_timeout=%d,",
+                          jiffies_to_msecs(opt->mount_timeout) / 1000);
        if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-               seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+               seq_printf(m, "osd_idle_ttl=%d,",
+                          jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
        if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
                seq_printf(m, "osdkeepalivetimeout=%d,",
-                          opt->osd_keepalive_timeout);
+                   jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
 
        /* drop redundant comma */
        if (m->count != pos)
@@ -626,8 +647,8 @@ static int have_mon_and_osd_map(struct ceph_client *client)
  */
 int __ceph_open_session(struct ceph_client *client, unsigned long started)
 {
-       int err;
-       unsigned long timeout = client->options->mount_timeout * HZ;
+       unsigned long timeout = client->options->mount_timeout;
+       long err;
 
        /* open session, and wait for mon and osd maps */
        err = ceph_monc_open_session(&client->monc);
@@ -635,16 +656,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
                return err;
 
        while (!have_mon_and_osd_map(client)) {
-               err = -EIO;
                if (timeout && time_after_eq(jiffies, started + timeout))
-                       return err;
+                       return -ETIMEDOUT;
 
                /* wait */
                dout("mount waiting for mon_map\n");
                err = wait_event_interruptible_timeout(client->auth_wq,
                        have_mon_and_osd_map(client) || (client->auth_err < 0),
-                       timeout);
-               if (err == -EINTR || err == -ERESTARTSYS)
+                       ceph_timeout_jiffies(timeout));
+               if (err < 0)
                        return err;
                if (client->auth_err < 0)
                        return client->auth_err;
@@ -721,5 +741,5 @@ module_exit(exit_ceph_lib);
 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
-MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_DESCRIPTION("Ceph core library");
 MODULE_LICENSE("GPL");
index 9d84ce4ea0dfa8928cc498b9f0515735666b7857..80d7c3a97cb84355e82e9d8f4c83fbf5b0d82893 100644 (file)
@@ -1,15 +1,11 @@
-
 #ifdef __KERNEL__
 # include <linux/slab.h>
+# include <linux/crush/crush.h>
 #else
-# include <stdlib.h>
-# include <assert.h>
-# define kfree(x) do { if (x) free(x); } while (0)
-# define BUG_ON(x) assert(!(x))
+# include "crush_compat.h"
+# include "crush.h"
 #endif
 
-#include <linux/crush/crush.h>
-
 const char *crush_bucket_alg_name(int alg)
 {
        switch (alg) {
@@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map)
                kfree(map->rules);
        }
 
+#ifndef __KERNEL__
+       kfree(map->choose_tries);
+#endif
        kfree(map);
 }
 
index 6192c7fc958ce84f6dc19b4005cf3580add5f88a..aae534c901a43169d73a8dc8c043dab47cc69941 100644 (file)
  *
  */
 
-#if defined(__linux__)
-#include <linux/types.h>
-#elif defined(__FreeBSD__)
-#include <sys/types.h>
-#endif
-
 #ifndef CEPH_CRUSH_LN_H
 #define CEPH_CRUSH_LN_H
 
+#ifdef __KERNEL__
+# include <linux/types.h>
+#else
+# include "crush_compat.h"
+#endif
 
-// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
-// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
-
-static int64_t __RH_LH_tbl[128*2+2] = {
+/*
+ * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+ * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+ */
+static __s64 __RH_LH_tbl[128*2+2] = {
   0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
   0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
   0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
@@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = {
   0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
   0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
   0x0000800000000000ll, 0x0000ffff00000000ll,
-  };
-
+};
 
-    // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
-static int64_t __LL_tbl[256] = {
+/*
+ * LL_tbl[k] = 2^48*log2(1.0+k/2^15)
+ */
+static __s64 __LL_tbl[256] = {
   0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
   0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
   0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
@@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = {
   0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
 };
 
-
-
-
 #endif
index 5bb63e37a8a10f3419a399b32339cf5baf279762..ed123af49eba563a004d5b69e8b08c648480cb59 100644 (file)
@@ -1,6 +1,8 @@
-
-#include <linux/types.h>
-#include <linux/crush/hash.h>
+#ifdef __KERNEL__
+# include <linux/crush/hash.h>
+#else
+# include "hash.h"
+#endif
 
 /*
  * Robert Jenkins' function for mixing 32-bit values
index 5b47736d27d94584bf33b2e60af56b5e76fc141c..393bfb22d5bbafd83c20f5953b98c6709b637452 100644 (file)
@@ -1,27 +1,31 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
 
 #ifdef __KERNEL__
 # include <linux/string.h>
 # include <linux/slab.h>
 # include <linux/bug.h>
 # include <linux/kernel.h>
-# ifndef dprintk
-#  define dprintk(args...)
-# endif
+# include <linux/crush/crush.h>
+# include <linux/crush/hash.h>
 #else
-# include <string.h>
-# include <stdio.h>
-# include <stdlib.h>
-# include <assert.h>
-# define BUG_ON(x) assert(!(x))
-# define dprintk(args...) /* printf(args) */
-# define kmalloc(x, f) malloc(x)
-# define kfree(x) free(x)
+# include "crush_compat.h"
+# include "crush.h"
+# include "hash.h"
 #endif
-
-#include <linux/crush/crush.h>
-#include <linux/crush/hash.h>
 #include "crush_ln_table.h"
 
+#define dprintk(args...) /* printf(args) */
+
 /*
  * Implement the core CRUSH mapping algorithm.
  */
@@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
        int i;
 
        for (i = bucket->h.size-1; i >= 0; i--) {
-               __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
+               __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i],
                                         r, bucket->h.id);
                w &= 0xffff;
                dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
@@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
        return bucket->h.items[high];
 }
 
-// compute 2^44*log2(input+1)
-uint64_t crush_ln(unsigned xin)
+/* compute 2^44*log2(input+1) */
+static __u64 crush_ln(unsigned int xin)
 {
-    unsigned x=xin, x1;
-    int iexpon, index1, index2;
-    uint64_t RH, LH, LL, xl64, result;
+       unsigned int x = xin, x1;
+       int iexpon, index1, index2;
+       __u64 RH, LH, LL, xl64, result;
 
-    x++;
+       x++;
 
-    // normalize input
-    iexpon = 15;
-    while(!(x&0x18000)) { x<<=1; iexpon--; }
+       /* normalize input */
+       iexpon = 15;
+       while (!(x & 0x18000)) {
+               x <<= 1;
+               iexpon--;
+       }
 
-    index1 = (x>>8)<<1;
-    // RH ~ 2^56/index1
-    RH = __RH_LH_tbl[index1 - 256];
-    // LH ~ 2^48 * log2(index1/256)
-    LH = __RH_LH_tbl[index1 + 1 - 256];
+       index1 = (x >> 8) << 1;
+       /* RH ~ 2^56/index1 */
+       RH = __RH_LH_tbl[index1 - 256];
+       /* LH ~ 2^48 * log2(index1/256) */
+       LH = __RH_LH_tbl[index1 + 1 - 256];
 
-    // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
-    xl64 = (int64_t)x * RH;
-    xl64 >>= 48;
-    x1 = xl64;
+       /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
+       xl64 = (__s64)x * RH;
+       xl64 >>= 48;
+       x1 = xl64;
 
-    result = iexpon;
-    result <<= (12 + 32);
+       result = iexpon;
+       result <<= (12 + 32);
 
-    index2 = x1 & 0xff;
-    // LL ~ 2^48*log2(1.0+index2/2^15)
-    LL = __LL_tbl[index2];
+       index2 = x1 & 0xff;
+       /* LL ~ 2^48*log2(1.0+index2/2^15) */
+       LL = __LL_tbl[index2];
 
-    LH = LH + LL;
+       LH = LH + LL;
 
-    LH >>= (48-12 - 32);
-    result += LH;
+       LH >>= (48 - 12 - 32);
+       result += LH;
 
-    return result;
+       return result;
 }
 
 
@@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin)
 static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
                                int x, int r)
 {
-       unsigned i, high = 0;
-       unsigned u;
-       unsigned w;
+       unsigned int i, high = 0;
+       unsigned int u;
+       unsigned int w;
        __s64 ln, draw, high_draw = 0;
 
        for (i = 0; i < bucket->h.size; i++) {
@@ -567,6 +574,10 @@ reject:
                out[outpos] = item;
                outpos++;
                count--;
+#ifndef __KERNEL__
+               if (map->choose_tries && ftotal <= map->choose_total_tries)
+                       map->choose_tries[ftotal]++;
+#endif
        }
 
        dprintk("CHOOSE returns %d\n", outpos);
@@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map,
        }
 
        for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
+#ifdef DEBUG_INDEP
+               if (out2 && ftotal) {
+                       dprintk("%u %d a: ", ftotal, left);
+                       for (rep = outpos; rep < endpos; rep++) {
+                               dprintk(" %d", out[rep]);
+                       }
+                       dprintk("\n");
+                       dprintk("%u %d b: ", ftotal, left);
+                       for (rep = outpos; rep < endpos; rep++) {
+                               dprintk(" %d", out2[rep]);
+                       }
+                       dprintk("\n");
+               }
+#endif
                for (rep = outpos; rep < endpos; rep++) {
                        if (out[rep] != CRUSH_ITEM_UNDEF)
                                continue;
@@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map,
                        out2[rep] = CRUSH_ITEM_NONE;
                }
        }
+#ifndef __KERNEL__
+       if (map->choose_tries && ftotal <= map->choose_total_tries)
+               map->choose_tries[ftotal]++;
+#endif
+#ifdef DEBUG_INDEP
+       if (out2) {
+               dprintk("%u %d a: ", ftotal, left);
+               for (rep = outpos; rep < endpos; rep++) {
+                       dprintk(" %d", out[rep]);
+               }
+               dprintk("\n");
+               dprintk("%u %d b: ", ftotal, left);
+               for (rep = outpos; rep < endpos; rep++) {
+                       dprintk(" %d", out2[rep]);
+               }
+               dprintk("\n");
+       }
+#endif
 }
 
 /**
@@ -790,8 +833,15 @@ int crush_do_rule(const struct crush_map *map,
 
                switch (curstep->op) {
                case CRUSH_RULE_TAKE:
-                       w[0] = curstep->arg1;
-                       wsize = 1;
+                       if ((curstep->arg1 >= 0 &&
+                            curstep->arg1 < map->max_devices) ||
+                           (-1-curstep->arg1 < map->max_buckets &&
+                            map->buckets[-1-curstep->arg1])) {
+                               w[0] = curstep->arg1;
+                               wsize = 1;
+                       } else {
+                               dprintk(" bad take value %d\n", curstep->arg1);
+                       }
                        break;
 
                case CRUSH_RULE_SET_CHOOSE_TRIES:
@@ -877,7 +927,7 @@ int crush_do_rule(const struct crush_map *map,
                                                0);
                                } else {
                                        out_size = ((numrep < (result_max-osize)) ?
-                                                    numrep : (result_max-osize));
+                                                   numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
                                                map->buckets[-1-w[i]],
@@ -923,5 +973,3 @@ int crush_do_rule(const struct crush_map *map,
        }
        return result_len;
 }
-
-
index 073262fea6ddab4acb4cd128ee402ab54b870552..1679f47280e2678202238e667c1b617c7665d5fa 100644 (file)
@@ -278,7 +278,6 @@ static void _ceph_msgr_exit(void)
        ceph_msgr_slab_exit();
 
        BUG_ON(zero_page == NULL);
-       kunmap(zero_page);
        page_cache_release(zero_page);
        zero_page = NULL;
 }
@@ -1545,7 +1544,7 @@ static int write_partial_message_data(struct ceph_connection *con)
                page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
                                                        &last_piece);
                ret = ceph_tcp_sendpage(con->sock, page, page_offset,
-                                     length, last_piece);
+                                       length, !last_piece);
                if (ret <= 0) {
                        if (do_datacrc)
                                msg->footer.data_crc = cpu_to_le32(crc);
index 2b3cf05e87b0fc44a150f1c314f2db98f1ab7dfc..9d6ff1215928cb69787a85421fdbab9e2a1c8bf5 100644 (file)
@@ -298,21 +298,28 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
 }
 EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
 
+/*
+ * Wait for an osdmap with a given epoch.
+ *
+ * @epoch: epoch to wait for
+ * @timeout: in jiffies, 0 means "wait forever"
+ */
 int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
                          unsigned long timeout)
 {
        unsigned long started = jiffies;
-       int ret;
+       long ret;
 
        mutex_lock(&monc->mutex);
        while (monc->have_osdmap < epoch) {
                mutex_unlock(&monc->mutex);
 
-               if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+               if (timeout && time_after_eq(jiffies, started + timeout))
                        return -ETIMEDOUT;
 
                ret = wait_event_interruptible_timeout(monc->client->auth_wq,
-                                        monc->have_osdmap >= epoch, timeout);
+                                               monc->have_osdmap >= epoch,
+                                               ceph_timeout_jiffies(timeout));
                if (ret < 0)
                        return ret;
 
index c4ec9239249ae6541a8ee378f95230a42c2f3a3d..50033677c0fa5134d540fba82cc8e298ce1367a0 100644 (file)
@@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
        case CEPH_OSD_OP_CMPXATTR:
                ceph_osd_data_release(&op->xattr.osd_data);
                break;
+       case CEPH_OSD_OP_STAT:
+               ceph_osd_data_release(&op->raw_data_in);
+               break;
        default:
                break;
        }
@@ -450,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
  */
 static struct ceph_osd_req_op *
 _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
-                               u16 opcode)
+                u16 opcode, u32 flags)
 {
        struct ceph_osd_req_op *op;
 
@@ -460,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
        op = &osd_req->r_ops[which];
        memset(op, 0, sizeof (*op));
        op->op = opcode;
+       op->flags = flags;
 
        return op;
 }
 
 void osd_req_op_init(struct ceph_osd_request *osd_req,
-                               unsigned int which, u16 opcode)
+                    unsigned int which, u16 opcode, u32 flags)
 {
-       (void)_osd_req_op_init(osd_req, which, opcode);
+       (void)_osd_req_op_init(osd_req, which, opcode, flags);
 }
 EXPORT_SYMBOL(osd_req_op_init);
 
@@ -476,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
                                u64 offset, u64 length,
                                u64 truncate_size, u32 truncate_seq)
 {
-       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+                                                     opcode, 0);
        size_t payload_len = 0;
 
        BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
@@ -515,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update);
 void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
                        u16 opcode, const char *class, const char *method)
 {
-       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+                                                     opcode, 0);
        struct ceph_pagelist *pagelist;
        size_t payload_len = 0;
        size_t size;
@@ -552,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
                          u16 opcode, const char *name, const void *value,
                          size_t size, u8 cmp_op, u8 cmp_mode)
 {
-       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+                                                     opcode, 0);
        struct ceph_pagelist *pagelist;
        size_t payload_len;
 
@@ -585,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
                                unsigned int which, u16 opcode,
                                u64 cookie, u64 version, int flag)
 {
-       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+                                                     opcode, 0);
 
        BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
 
@@ -602,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
                                u64 expected_write_size)
 {
        struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
-                                                     CEPH_OSD_OP_SETALLOCHINT);
+                                                     CEPH_OSD_OP_SETALLOCHINT,
+                                                     0);
 
        op->alloc_hint.expected_object_size = expected_object_size;
        op->alloc_hint.expected_write_size = expected_write_size;
@@ -786,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        }
 
        if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
-               osd_req_op_init(req, which, opcode);
+               osd_req_op_init(req, which, opcode, 0);
        } else {
                u32 object_size = le32_to_cpu(layout->fl_object_size);
                u32 object_base = off - objoff;
@@ -1088,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
        BUG_ON(!list_empty(&osd->o_osd_lru));
 
        list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
-       osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+       osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
 }
 
 static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@@ -1199,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
 static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
 {
        schedule_delayed_work(&osdc->timeout_work,
-                       osdc->client->options->osd_keepalive_timeout * HZ);
+                             osdc->client->options->osd_keepalive_timeout);
 }
 
 static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -1567,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
 {
        struct ceph_osd_client *osdc =
                container_of(work, struct ceph_osd_client, timeout_work.work);
+       struct ceph_options *opts = osdc->client->options;
        struct ceph_osd_request *req;
        struct ceph_osd *osd;
-       unsigned long keepalive =
-               osdc->client->options->osd_keepalive_timeout * HZ;
        struct list_head slow_osds;
        dout("timeout\n");
        down_read(&osdc->map_sem);
@@ -1586,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
         */
        INIT_LIST_HEAD(&slow_osds);
        list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
-               if (time_before(jiffies, req->r_stamp + keepalive))
+               if (time_before(jiffies,
+                               req->r_stamp + opts->osd_keepalive_timeout))
                        break;
 
                osd = req->r_osd;
@@ -1613,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
        struct ceph_osd_client *osdc =
                container_of(work, struct ceph_osd_client,
                             osds_timeout_work.work);
-       unsigned long delay =
-               osdc->client->options->osd_idle_ttl * HZ >> 2;
+       unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
 
        dout("osds timeout\n");
        down_read(&osdc->map_sem);
@@ -2619,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
        osdc->event_count = 0;
 
        schedule_delayed_work(&osdc->osds_timeout_work,
-          round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+           round_jiffies_relative(osdc->client->options->osd_idle_ttl));
 
        err = -ENOMEM;
        osdc->req_mempool = mempool_create_kmalloc_pool(10,
index 15796696d64ede6b6a3118f5077972bed0c64350..4a3125836b64a0e5264e005badb7d108ddf9c47b 100644 (file)
@@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end,
 {
        int j;
        dout("crush_decode_tree_bucket %p to %p\n", *p, end);
-       ceph_decode_32_safe(p, end, b->num_nodes, bad);
+       ceph_decode_8_safe(p, end, b->num_nodes, bad);
        b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
        if (b->node_weights == NULL)
                return -ENOMEM;
index 096d91447e06e8a759ab8daf5283ddd4d27922b9..d4f5f220a8e55e063db6d3f3923a32be1ac33223 100644 (file)
@@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
                        set_page_dirty_lock(pages[i]);
                put_page(pages[i]);
        }
-       if (is_vmalloc_addr(pages))
-               vfree(pages);
-       else
-               kfree(pages);
+       kvfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
 
index 65de0684e22a17862663096da407eee16bc33a31..61eafc9b4545d5a9e1ea405e196753b3608c5fa0 100644 (file)
@@ -197,11 +197,4 @@ static int __init ipv4_netfilter_init(void)
 {
        return nf_register_afinfo(&nf_ip_afinfo);
 }
-
-static void __exit ipv4_netfilter_fini(void)
-{
-       nf_unregister_afinfo(&nf_ip_afinfo);
-}
-
-module_init(ipv4_netfilter_init);
-module_exit(ipv4_netfilter_fini);
+subsys_initcall(ipv4_netfilter_init);
index 936ad0a15371ac4f1e49b8785ef645fa65583e83..b512fbd9d79a403ee980d40c1a21fc3fde47f215 100644 (file)
@@ -14,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
            sunrpc_syms.o cache.o rpc_pipe.o \
            svc_xprt.o
 sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
-sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
index 9dd0ea8db463acc9daba0c51be89b1f17ec8f17d..9825ff0f91d6c0bde819105f639cae21883bbfad 100644 (file)
@@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
 {
-       return xprt->bc_alloc_count > 0;
+       return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
 }
 
 static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
 {
+       atomic_add(n, &xprt->bc_free_slots);
        xprt->bc_alloc_count += n;
 }
 
 static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
 {
+       atomic_sub(n, &xprt->bc_free_slots);
        return xprt->bc_alloc_count -= n;
 }
 
@@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req)
 
        dprintk("RPC:        free allocations for req= %p\n", req);
        WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
-       xbufp = &req->rq_private_buf;
+       xbufp = &req->rq_rcv_buf;
        free_page((unsigned long)xbufp->head[0].iov_base);
        xbufp = &req->rq_snd_buf;
        free_page((unsigned long)xbufp->head[0].iov_base);
        kfree(req);
 }
 
+static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
+{
+       struct page *page;
+       /* Preallocate one XDR receive buffer */
+       page = alloc_page(gfp_flags);
+       if (page == NULL)
+               return -ENOMEM;
+       buf->head[0].iov_base = page_address(page);
+       buf->head[0].iov_len = PAGE_SIZE;
+       buf->tail[0].iov_base = NULL;
+       buf->tail[0].iov_len = 0;
+       buf->page_len = 0;
+       buf->len = 0;
+       buf->buflen = PAGE_SIZE;
+       return 0;
+}
+
+static
+struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+       struct rpc_rqst *req;
+
+       /* Pre-allocate one backchannel rpc_rqst */
+       req = kzalloc(sizeof(*req), gfp_flags);
+       if (req == NULL)
+               return NULL;
+
+       req->rq_xprt = xprt;
+       INIT_LIST_HEAD(&req->rq_list);
+       INIT_LIST_HEAD(&req->rq_bc_list);
+
+       /* Preallocate one XDR receive buffer */
+       if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
+               printk(KERN_ERR "Failed to create bc receive xbuf\n");
+               goto out_free;
+       }
+       req->rq_rcv_buf.len = PAGE_SIZE;
+
+       /* Preallocate one XDR send buffer */
+       if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) {
+               printk(KERN_ERR "Failed to create bc snd xbuf\n");
+               goto out_free;
+       }
+       return req;
+out_free:
+       xprt_free_allocation(req);
+       return NULL;
+}
+
 /*
  * Preallocate up to min_reqs structures and related buffers for use
  * by the backchannel.  This function can be called multiple times
@@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req)
  */
 int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
 {
-       struct page *page_rcv = NULL, *page_snd = NULL;
-       struct xdr_buf *xbufp = NULL;
-       struct rpc_rqst *req, *tmp;
+       struct rpc_rqst *req;
        struct list_head tmp_list;
        int i;
 
@@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
        INIT_LIST_HEAD(&tmp_list);
        for (i = 0; i < min_reqs; i++) {
                /* Pre-allocate one backchannel rpc_rqst */
-               req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+               req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
                if (req == NULL) {
                        printk(KERN_ERR "Failed to create bc rpc_rqst\n");
                        goto out_free;
@@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
                /* Add the allocated buffer to the tmp list */
                dprintk("RPC:       adding req= %p\n", req);
                list_add(&req->rq_bc_pa_list, &tmp_list);
-
-               req->rq_xprt = xprt;
-               INIT_LIST_HEAD(&req->rq_list);
-               INIT_LIST_HEAD(&req->rq_bc_list);
-
-               /* Preallocate one XDR receive buffer */
-               page_rcv = alloc_page(GFP_KERNEL);
-               if (page_rcv == NULL) {
-                       printk(KERN_ERR "Failed to create bc receive xbuf\n");
-                       goto out_free;
-               }
-               xbufp = &req->rq_rcv_buf;
-               xbufp->head[0].iov_base = page_address(page_rcv);
-               xbufp->head[0].iov_len = PAGE_SIZE;
-               xbufp->tail[0].iov_base = NULL;
-               xbufp->tail[0].iov_len = 0;
-               xbufp->page_len = 0;
-               xbufp->len = PAGE_SIZE;
-               xbufp->buflen = PAGE_SIZE;
-
-               /* Preallocate one XDR send buffer */
-               page_snd = alloc_page(GFP_KERNEL);
-               if (page_snd == NULL) {
-                       printk(KERN_ERR "Failed to create bc snd xbuf\n");
-                       goto out_free;
-               }
-
-               xbufp = &req->rq_snd_buf;
-               xbufp->head[0].iov_base = page_address(page_snd);
-               xbufp->head[0].iov_len = 0;
-               xbufp->tail[0].iov_base = NULL;
-               xbufp->tail[0].iov_len = 0;
-               xbufp->page_len = 0;
-               xbufp->len = 0;
-               xbufp->buflen = PAGE_SIZE;
        }
 
        /*
@@ -167,7 +181,10 @@ out_free:
        /*
         * Memory allocation failed, free the temporary list
         */
-       list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
+       while (!list_empty(&tmp_list)) {
+               req = list_first_entry(&tmp_list,
+                               struct rpc_rqst,
+                               rq_bc_pa_list);
                list_del(&req->rq_bc_pa_list);
                xprt_free_allocation(req);
        }
@@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
        struct rpc_rqst *req = NULL;
 
        dprintk("RPC:       allocate a backchannel request\n");
-       if (list_empty(&xprt->bc_pa_list))
+       if (atomic_read(&xprt->bc_free_slots) <= 0)
                goto not_found;
-
+       if (list_empty(&xprt->bc_pa_list)) {
+               req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
+               if (!req)
+                       goto not_found;
+               /* Note: this 'free' request adds it to xprt->bc_pa_list */
+               xprt_free_bc_request(req);
+       }
        req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
                                rq_bc_pa_list);
        req->rq_reply_bytes_recvd = 0;
@@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req)
 
        req->rq_connect_cookie = xprt->connect_cookie - 1;
        smp_mb__before_atomic();
-       WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
        clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
        smp_mb__after_atomic();
 
-       if (!xprt_need_to_requeue(xprt)) {
+       /*
+        * Return it to the list of preallocations so that it
+        * may be reused by a new callback request.
+        */
+       spin_lock_bh(&xprt->bc_pa_lock);
+       if (xprt_need_to_requeue(xprt)) {
+               list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+               xprt->bc_alloc_count++;
+               req = NULL;
+       }
+       spin_unlock_bh(&xprt->bc_pa_lock);
+       if (req != NULL) {
                /*
                 * The last remaining session was destroyed while this
                 * entry was in use.  Free the entry and don't attempt
@@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req)
                xprt_free_allocation(req);
                return;
        }
-
-       /*
-        * Return it to the list of preallocations so that it
-        * may be reused by a new callback request.
-        */
-       spin_lock_bh(&xprt->bc_pa_lock);
-       list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
-       spin_unlock_bh(&xprt->bc_pa_lock);
 }
 
 /*
@@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
 
        spin_lock(&xprt->bc_pa_lock);
        list_del(&req->rq_bc_pa_list);
+       xprt->bc_alloc_count--;
        spin_unlock(&xprt->bc_pa_lock);
 
        req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
deleted file mode 100644 (file)
index 15c7a8a..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/******************************************************************************
-
-(c) 2007 Network Appliance, Inc.  All Rights Reserved.
-(c) 2009 NetApp.  All Rights Reserved.
-
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-http://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-******************************************************************************/
-
-/*
- * The NFSv4.1 callback service helper routines.
- * They implement the transport level processing required to send the
- * reply over an existing open connection previously established by the client.
- */
-
-#include <linux/module.h>
-
-#include <linux/sunrpc/xprt.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/sunrpc/bc_xprt.h>
-
-#define RPCDBG_FACILITY        RPCDBG_SVCDSP
-
-/* Empty callback ops */
-static const struct rpc_call_ops nfs41_callback_ops = {
-};
-
-
-/*
- * Send the callback reply
- */
-int bc_send(struct rpc_rqst *req)
-{
-       struct rpc_task *task;
-       int ret;
-
-       dprintk("RPC:       bc_send req= %p\n", req);
-       task = rpc_run_bc_task(req, &nfs41_callback_ops);
-       if (IS_ERR(task))
-               ret = PTR_ERR(task);
-       else {
-               WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
-               ret = task->tk_status;
-               rpc_put_task(task);
-       }
-       dprintk("RPC:       bc_send ret= %d\n", ret);
-       return ret;
-}
-
index e6ce1517367f884608640b2532080ab6566b9379..cbc6af923dd1cb0baabc95161989133150269d4f 100644 (file)
@@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
                        task->tk_flags |= RPC_TASK_SOFT;
                if (clnt->cl_noretranstimeo)
                        task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
-               if (sk_memalloc_socks()) {
-                       struct rpc_xprt *xprt;
-
-                       rcu_read_lock();
-                       xprt = rcu_dereference(clnt->cl_xprt);
-                       if (xprt->swapper)
-                               task->tk_flags |= RPC_TASK_SWAPPER;
-                       rcu_read_unlock();
-               }
+               if (atomic_read(&clnt->cl_swapper))
+                       task->tk_flags |= RPC_TASK_SWAPPER;
                /* Add to the client's list of all tasks */
                spin_lock(&clnt->cl_lock);
                list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
  * @req: RPC request
- * @tk_ops: RPC call ops
  */
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
-                               const struct rpc_call_ops *tk_ops)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 {
        struct rpc_task *task;
        struct xdr_buf *xbufp = &req->rq_snd_buf;
        struct rpc_task_setup task_setup_data = {
-               .callback_ops = tk_ops,
+               .callback_ops = &rpc_default_ops,
+               .flags = RPC_TASK_SOFTCONN,
        };
 
        dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task)
                                        req->rq_callsize + req->rq_rcvsize);
        if (req->rq_buffer != NULL)
                return;
+       xprt_inject_disconnect(xprt);
 
        dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
 
@@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
 
-       if (!xprt_prepare_transmit(task)) {
-               /*
-                * Could not reserve the transport. Try again after the
-                * transport is released.
-                */
-               task->tk_status = 0;
-               task->tk_action = call_bc_transmit;
-               return;
-       }
+       if (!xprt_prepare_transmit(task))
+               goto out_retry;
 
-       task->tk_action = rpc_exit_task;
        if (task->tk_status < 0) {
                printk(KERN_NOTICE "RPC: Could not send backchannel reply "
                        "error: %d\n", task->tk_status);
-               return;
+               goto out_done;
        }
+       if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
+               req->rq_bytes_sent = 0;
 
        xprt_transmit(task);
+
+       if (task->tk_status == -EAGAIN)
+               goto out_nospace;
+
        xprt_end_transmit(task);
        dprint_status(task);
        switch (task->tk_status) {
        case 0:
                /* Success */
-               break;
        case -EHOSTDOWN:
        case -EHOSTUNREACH:
        case -ENETUNREACH:
+       case -ECONNRESET:
+       case -ECONNREFUSED:
+       case -EADDRINUSE:
+       case -ENOTCONN:
+       case -EPIPE:
+               break;
        case -ETIMEDOUT:
                /*
                 * Problem reaching the server.  Disconnect and let the
@@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task)
                break;
        }
        rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
+out_done:
+       task->tk_action = rpc_exit_task;
+       return;
+out_nospace:
+       req->rq_connect_cookie = req->rq_xprt->connect_cookie;
+out_retry:
+       task->tk_status = 0;
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
@@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net)
        spin_unlock(&sn->rpc_client_lock);
 }
 #endif
+
+#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+int
+rpc_clnt_swap_activate(struct rpc_clnt *clnt)
+{
+       int ret = 0;
+       struct rpc_xprt *xprt;
+
+       if (atomic_inc_return(&clnt->cl_swapper) == 1) {
+retry:
+               rcu_read_lock();
+               xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+               rcu_read_unlock();
+               if (!xprt) {
+                       /*
+                        * If we didn't get a reference, then we likely are
+                        * racing with a migration event. Wait for a grace
+                        * period and try again.
+                        */
+                       synchronize_rcu();
+                       goto retry;
+               }
+
+               ret = xprt_enable_swap(xprt);
+               xprt_put(xprt);
+       }
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
+
+void
+rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
+{
+       struct rpc_xprt *xprt;
+
+       if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) {
+retry:
+               rcu_read_lock();
+               xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+               rcu_read_unlock();
+               if (!xprt) {
+                       /*
+                        * If we didn't get a reference, then we likely are
+                        * racing with a migration event. Wait for a grace
+                        * period and try again.
+                        */
+                       synchronize_rcu();
+                       goto retry;
+               }
+
+               xprt_disable_swap(xprt);
+               xprt_put(xprt);
+       }
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
+#endif /* CONFIG_SUNRPC_SWAP */
index 82962f7e6e888f619ad79754f038732d5d5b6333..e7b4d93566df42dfa5ecf985152235e539ed9933 100644 (file)
 #include "netns.h"
 
 static struct dentry *topdir;
+static struct dentry *rpc_fault_dir;
 static struct dentry *rpc_clnt_dir;
 static struct dentry *rpc_xprt_dir;
 
+unsigned int rpc_inject_disconnect;
+
 struct rpc_clnt_iter {
        struct rpc_clnt *clnt;
        loff_t          pos;
@@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
                debugfs_remove_recursive(xprt->debugfs);
                xprt->debugfs = NULL;
        }
+
+       atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
 }
 
 void
@@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
        xprt->debugfs = NULL;
 }
 
+static int
+fault_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = kmalloc(128, GFP_KERNEL);
+       if (!filp->private_data)
+               return -ENOMEM;
+       return 0;
+}
+
+static int
+fault_release(struct inode *inode, struct file *filp)
+{
+       kfree(filp->private_data);
+       return 0;
+}
+
+static ssize_t
+fault_disconnect_read(struct file *filp, char __user *user_buf,
+                     size_t len, loff_t *offset)
+{
+       char *buffer = (char *)filp->private_data;
+       size_t size;
+
+       size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
+       return simple_read_from_buffer(user_buf, len, offset, buffer, size);
+}
+
+static ssize_t
+fault_disconnect_write(struct file *filp, const char __user *user_buf,
+                      size_t len, loff_t *offset)
+{
+       char buffer[16];
+
+       if (len >= sizeof(buffer))
+               len = sizeof(buffer) - 1;
+       if (copy_from_user(buffer, user_buf, len))
+               return -EFAULT;
+       buffer[len] = '\0';
+       if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
+               return -EINVAL;
+       return len;
+}
+
+static const struct file_operations fault_disconnect_fops = {
+       .owner          = THIS_MODULE,
+       .open           = fault_open,
+       .read           = fault_disconnect_read,
+       .write          = fault_disconnect_write,
+       .release        = fault_release,
+};
+
+static struct dentry *
+inject_fault_dir(struct dentry *topdir)
+{
+       struct dentry *faultdir;
+
+       faultdir = debugfs_create_dir("inject_fault", topdir);
+       if (!faultdir)
+               return NULL;
+
+       if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+                                NULL, &fault_disconnect_fops))
+               return NULL;
+
+       return faultdir;
+}
+
 void __exit
 sunrpc_debugfs_exit(void)
 {
        debugfs_remove_recursive(topdir);
        topdir = NULL;
+       rpc_fault_dir = NULL;
        rpc_clnt_dir = NULL;
        rpc_xprt_dir = NULL;
 }
@@ -282,6 +355,10 @@ sunrpc_debugfs_init(void)
        if (!topdir)
                return;
 
+       rpc_fault_dir = inject_fault_dir(topdir);
+       if (!rpc_fault_dir)
+               goto out_remove;
+
        rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
        if (!rpc_clnt_dir)
                goto out_remove;
@@ -294,5 +371,6 @@ sunrpc_debugfs_init(void)
 out_remove:
        debugfs_remove_recursive(topdir);
        topdir = NULL;
+       rpc_fault_dir = NULL;
        rpc_clnt_dir = NULL;
 }
index 852ae606b02a37760a5a4dc1fd1860b8a45b89a6..5a16d8d8c831c4ad2805f5958b9ccef63449af82 100644 (file)
@@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 {
        struct kvec     *argv = &rqstp->rq_arg.head[0];
        struct kvec     *resv = &rqstp->rq_res.head[0];
+       struct rpc_task *task;
+       int proc_error;
+       int error;
+
+       dprintk("svc: %s(%p)\n", __func__, req);
 
        /* Build the svc_rqst used by the common processing routine */
        rqstp->rq_xprt = serv->sv_bc_xprt;
@@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 
        /*
         * Skip the next two words because they've already been
-        * processed in the trasport
+        * processed in the transport
         */
        svc_getu32(argv);       /* XID */
        svc_getnl(argv);        /* CALLDIR */
 
-       /* Returns 1 for send, 0 for drop */
-       if (svc_process_common(rqstp, argv, resv)) {
-               memcpy(&req->rq_snd_buf, &rqstp->rq_res,
-                                               sizeof(req->rq_snd_buf));
-               return bc_send(req);
-       } else {
-               /* drop request */
+       /* Parse and execute the bc call */
+       proc_error = svc_process_common(rqstp, argv, resv);
+
+       atomic_inc(&req->rq_xprt->bc_free_slots);
+       if (!proc_error) {
+               /* Processing error: drop the request */
                xprt_free_bc_request(req);
                return 0;
        }
+
+       /* Finally, send the reply synchronously */
+       memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
+       task = rpc_run_bc_task(req);
+       if (IS_ERR(task)) {
+               error = PTR_ERR(task);
+               goto out;
+       }
+
+       WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
+       error = task->tk_status;
+       rpc_put_task(task);
+
+out:
+       dprintk("svc: %s(), error=%d\n", __func__, error);
+       return error;
 }
 EXPORT_SYMBOL_GPL(bc_svc_process);
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
index 1d4fe24af06a1115bd80538c5346ae2f843f1eb8..ab5dd621ae0c0795a0d86e1a9fb83c5cc2812486 100644 (file)
@@ -68,6 +68,7 @@ static void    xprt_init(struct rpc_xprt *xprt, struct net *net);
 static void    xprt_request_init(struct rpc_task *, struct rpc_xprt *);
 static void    xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
 static void     xprt_destroy(struct rpc_xprt *xprt);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
@@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
        }
        xprt_clear_locked(xprt);
 out_sleep:
+       if (req)
+               __xprt_put_cong(xprt, req);
        dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
@@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work)
        struct rpc_xprt *xprt =
                container_of(work, struct rpc_xprt, task_cleanup);
 
-       xprt->ops->close(xprt);
        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+       xprt->ops->close(xprt);
        xprt_release_write(xprt, NULL);
 }
 
@@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task)
                task->tk_status = status;
                return;
        }
+       xprt_inject_disconnect(xprt);
 
        dprintk("RPC: %5u xmit complete\n", task->tk_pid);
        task->tk_flags |= RPC_TASK_SENT;
@@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task)
        spin_unlock_bh(&xprt->transport_lock);
        if (req->rq_buffer)
                xprt->ops->buf_free(req->rq_buffer);
+       xprt_inject_disconnect(xprt);
        if (req->rq_cred != NULL)
                put_rpccred(req->rq_cred);
        task->tk_rqstp = NULL;
index 302d4ebf6fbfb2a2c15780ef966b52bb473b91e4..f1e8dafbd5079b3406a769ba4854ecba229edca6 100644 (file)
  * can take tens of usecs to complete.
  */
 
+/* Normal operation
+ *
+ * A Memory Region is prepared for RDMA READ or WRITE using the
+ * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
+ * finished, the Memory Region is unmapped using the ib_unmap_fmr
+ * verb (fmr_op_unmap).
+ */
+
+/* Transport recovery
+ *
+ * After a transport reconnect, fmr_op_map re-uses the MR already
+ * allocated for the RPC, but generates a fresh rkey then maps the
+ * MR again. This process is synchronous.
+ */
+
 #include "xprt_rdma.h"
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
        struct rpcrdma_mw *r;
        int i, rc;
 
+       spin_lock_init(&buf->rb_mwlock);
        INIT_LIST_HEAD(&buf->rb_mws);
        INIT_LIST_HEAD(&buf->rb_all);
 
-       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-       dprintk("RPC:       %s: initializing %d FMRs\n", __func__, i);
+       i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
+       i += 2;                         /* head + tail */
+       i *= buf->rb_max_requests;      /* one set for each RPC slot */
+       dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
 
+       rc = -ENOMEM;
        while (i--) {
                r = kzalloc(sizeof(*r), GFP_KERNEL);
                if (!r)
-                       return -ENOMEM;
+                       goto out;
 
-               r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
-               if (IS_ERR(r->r.fmr))
+               r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
+                                            sizeof(u64), GFP_KERNEL);
+               if (!r->r.fmr.physaddrs)
+                       goto out_free;
+
+               r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+               if (IS_ERR(r->r.fmr.fmr))
                        goto out_fmr_err;
 
                list_add(&r->mw_list, &buf->rb_mws);
@@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
        return 0;
 
 out_fmr_err:
-       rc = PTR_ERR(r->r.fmr);
+       rc = PTR_ERR(r->r.fmr.fmr);
        dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
+       kfree(r->r.fmr.physaddrs);
+out_free:
        kfree(r);
+out:
        return rc;
 }
 
+static int
+__fmr_unmap(struct rpcrdma_mw *r)
+{
+       LIST_HEAD(l);
+
+       list_add(&r->r.fmr.fmr->list, &l);
+       return ib_unmap_fmr(&l);
+}
+
 /* Use the ib_map_phys_fmr() verb to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
@@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
           int nsegs, bool writing)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       struct ib_device *device = ia->ri_id->device;
+       struct ib_device *device = ia->ri_device;
        enum dma_data_direction direction = rpcrdma_data_dir(writing);
        struct rpcrdma_mr_seg *seg1 = seg;
-       struct rpcrdma_mw *mw = seg1->rl_mw;
-       u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
        int len, pageoff, i, rc;
+       struct rpcrdma_mw *mw;
+
+       mw = seg1->rl_mw;
+       seg1->rl_mw = NULL;
+       if (!mw) {
+               mw = rpcrdma_get_mw(r_xprt);
+               if (!mw)
+                       return -ENOMEM;
+       } else {
+               /* this is a retransmit; generate a fresh rkey */
+               rc = __fmr_unmap(mw);
+               if (rc)
+                       return rc;
+       }
 
        pageoff = offset_in_page(seg1->mr_offset);
        seg1->mr_offset -= pageoff;     /* start of page */
@@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
                nsegs = RPCRDMA_MAX_FMR_SGES;
        for (i = 0; i < nsegs;) {
                rpcrdma_map_one(device, seg, direction);
-               physaddrs[i] = seg->mr_dma;
+               mw->r.fmr.physaddrs[i] = seg->mr_dma;
                len += seg->mr_len;
                ++seg;
                ++i;
@@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
                        break;
        }
 
-       rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
+       rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
+                            i, seg1->mr_dma);
        if (rc)
                goto out_maperr;
 
-       seg1->mr_rkey = mw->r.fmr->rkey;
+       seg1->rl_mw = mw;
+       seg1->mr_rkey = mw->r.fmr.fmr->rkey;
        seg1->mr_base = seg1->mr_dma + pageoff;
        seg1->mr_nsegs = i;
        seg1->mr_len = len;
@@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rpcrdma_mr_seg *seg1 = seg;
-       struct ib_device *device;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
        int rc, nsegs = seg->mr_nsegs;
-       LIST_HEAD(l);
 
-       list_add(&seg1->rl_mw->r.fmr->list, &l);
-       rc = ib_unmap_fmr(&l);
-       read_lock(&ia->ri_qplock);
-       device = ia->ri_id->device;
+       dprintk("RPC:       %s: FMR %p\n", __func__, mw);
+
+       seg1->rl_mw = NULL;
        while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(device, seg++);
-       read_unlock(&ia->ri_qplock);
+               rpcrdma_unmap_one(ia->ri_device, seg++);
+       rc = __fmr_unmap(mw);
        if (rc)
                goto out_err;
+       rpcrdma_put_mw(r_xprt, mw);
        return nsegs;
 
 out_err:
+       /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
+        * will attempt to release it when the transport is destroyed.
+        */
        dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
        return nsegs;
 }
 
-/* After a disconnect, unmap all FMRs.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_fmr_external().
- */
-static void
-fmr_op_reset(struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct rpcrdma_mw *r;
-       LIST_HEAD(list);
-       int rc;
-
-       list_for_each_entry(r, &buf->rb_all, mw_all)
-               list_add(&r->r.fmr->list, &list);
-
-       rc = ib_unmap_fmr(&list);
-       if (rc)
-               dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
-                       __func__, rc);
-}
-
 static void
 fmr_op_destroy(struct rpcrdma_buffer *buf)
 {
@@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
        while (!list_empty(&buf->rb_all)) {
                r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
                list_del(&r->mw_all);
-               rc = ib_dealloc_fmr(r->r.fmr);
+               kfree(r->r.fmr.physaddrs);
+
+               rc = ib_dealloc_fmr(r->r.fmr.fmr);
                if (rc)
                        dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
                                __func__, rc);
+
                kfree(r);
        }
 }
@@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
        .ro_open                        = fmr_op_open,
        .ro_maxpages                    = fmr_op_maxpages,
        .ro_init                        = fmr_op_init,
-       .ro_reset                       = fmr_op_reset,
        .ro_destroy                     = fmr_op_destroy,
        .ro_displayname                 = "fmr",
 };
index d234521320a4bb45a9bbecd40e5e58bbb42d77d8..04ea914201b237cc6f42ce68caa6b5dbc7b29d59 100644 (file)
  * but most complex memory registration mode.
  */
 
+/* Normal operation
+ *
+ * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
+ * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Memory Region is invalidated using a LOCAL_INV Work Request
+ * (frmr_op_unmap).
+ *
+ * Typically these Work Requests are not signaled, and neither are RDMA
+ * SEND Work Requests (with the exception of signaling occasionally to
+ * prevent provider work queue overflows). This greatly reduces HCA
+ * interrupt workload.
+ *
+ * As an optimization, frwr_op_unmap marks MRs INVALID before the
+ * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
+ * rb_mws immediately so that no work (like managing a linked list
+ * under a spinlock) is needed in the completion upcall.
+ *
+ * But this means that frwr_op_map() can occasionally encounter an MR
+ * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
+ * ordering prevents a subsequent FAST_REG WR from executing against
+ * that MR while it is still being invalidated.
+ */
+
+/* Transport recovery
+ *
+ * ->op_map and the transport connect worker cannot run at the same
+ * time, but ->op_unmap can fire while the transport connect worker
+ * is running. Thus MR recovery is handled in ->op_map, to guarantee
+ * that recovered MRs are owned by a sending RPC, and not one where
+ * ->op_unmap could fire at the same time transport reconnect is
+ * being done.
+ *
+ * When the underlying transport disconnects, MRs are left in one of
+ * three states:
+ *
+ * INVALID:    The MR was not in use before the QP entered ERROR state.
+ *             (Or, the LOCAL_INV WR has not completed or flushed yet).
+ *
+ * STALE:      The MR was being registered or unregistered when the QP
+ *             entered ERROR state, and the pending WR was flushed.
+ *
+ * VALID:      The MR was registered before the QP entered ERROR state.
+ *
+ * When frwr_op_map encounters STALE and VALID MRs, they are recovered
+ * with ib_dereg_mr and then are re-initialized. Beause MR recovery
+ * allocates fresh resources, it is deferred to a workqueue, and the
+ * recovered MRs are placed back on the rb_mws list when recovery is
+ * complete. frwr_op_map allocates another MR for the current RPC while
+ * the broken MR is reset.
+ *
+ * To ensure that frwr_op_map doesn't encounter an MR that is marked
+ * INVALID but that is about to be flushed due to a previous transport
+ * disconnect, the transport connect worker attempts to drain all
+ * pending send queue WRs before the transport is reconnected.
+ */
+
 #include "xprt_rdma.h"
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY       RPCDBG_TRANS
 #endif
 
+static struct workqueue_struct *frwr_recovery_wq;
+
+#define FRWR_RECOVERY_WQ_FLAGS         (WQ_UNBOUND | WQ_MEM_RECLAIM)
+
+int
+frwr_alloc_recovery_wq(void)
+{
+       frwr_recovery_wq = alloc_workqueue("frwr_recovery",
+                                          FRWR_RECOVERY_WQ_FLAGS, 0);
+       return !frwr_recovery_wq ? -ENOMEM : 0;
+}
+
+void
+frwr_destroy_recovery_wq(void)
+{
+       struct workqueue_struct *wq;
+
+       if (!frwr_recovery_wq)
+               return;
+
+       wq = frwr_recovery_wq;
+       frwr_recovery_wq = NULL;
+       destroy_workqueue(wq);
+}
+
+/* Deferred reset of a single FRMR. Generate a fresh rkey by
+ * replacing the MR.
+ *
+ * There's no recovery if this fails. The FRMR is abandoned, but
+ * remains in rb_all. It will be cleaned up when the transport is
+ * destroyed.
+ */
+static void
+__frwr_recovery_worker(struct work_struct *work)
+{
+       struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
+                                           r.frmr.fr_work);
+       struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt;
+       unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+
+       if (ib_dereg_mr(r->r.frmr.fr_mr))
+               goto out_fail;
+
+       r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+       if (IS_ERR(r->r.frmr.fr_mr))
+               goto out_fail;
+
+       dprintk("RPC:       %s: recovered FRMR %p\n", __func__, r);
+       r->r.frmr.fr_state = FRMR_IS_INVALID;
+       rpcrdma_put_mw(r_xprt, r);
+       return;
+
+out_fail:
+       pr_warn("RPC:       %s: FRMR %p unrecovered\n",
+               __func__, r);
+}
+
+/* A broken MR was discovered in a context that can't sleep.
+ * Defer recovery to the recovery worker.
+ */
+static void
+__frwr_queue_recovery(struct rpcrdma_mw *r)
+{
+       INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker);
+       queue_work(frwr_recovery_wq, &r->r.frmr.fr_work);
+}
+
 static int
 __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
            unsigned int depth)
@@ -128,7 +252,7 @@ frwr_sendcompletion(struct ib_wc *wc)
 
        /* WARNING: Only wr_id and status are reliable at this point */
        r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-       dprintk("RPC:       %s: frmr %p (stale), status %s (%d)\n",
+       pr_warn("RPC:       %s: frmr %p flushed, status %s (%d)\n",
                __func__, r, ib_wc_status_msg(wc->status), wc->status);
        r->r.frmr.fr_state = FRMR_IS_STALE;
 }
@@ -137,16 +261,19 @@ static int
 frwr_op_init(struct rpcrdma_xprt *r_xprt)
 {
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+       struct ib_device *device = r_xprt->rx_ia.ri_device;
        unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
        struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
        int i;
 
+       spin_lock_init(&buf->rb_mwlock);
        INIT_LIST_HEAD(&buf->rb_mws);
        INIT_LIST_HEAD(&buf->rb_all);
 
-       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-       dprintk("RPC:       %s: initializing %d FRMRs\n", __func__, i);
+       i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1);
+       i += 2;                         /* head + tail */
+       i *= buf->rb_max_requests;      /* one set for each RPC slot */
+       dprintk("RPC:       %s: initalizing %d FRMRs\n", __func__, i);
 
        while (i--) {
                struct rpcrdma_mw *r;
@@ -165,6 +292,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
                list_add(&r->mw_list, &buf->rb_mws);
                list_add(&r->mw_all, &buf->rb_all);
                r->mw_sendcompletion = frwr_sendcompletion;
+               r->r.frmr.fr_xprt = r_xprt;
        }
 
        return 0;
@@ -178,12 +306,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
            int nsegs, bool writing)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       struct ib_device *device = ia->ri_id->device;
+       struct ib_device *device = ia->ri_device;
        enum dma_data_direction direction = rpcrdma_data_dir(writing);
        struct rpcrdma_mr_seg *seg1 = seg;
-       struct rpcrdma_mw *mw = seg1->rl_mw;
-       struct rpcrdma_frmr *frmr = &mw->r.frmr;
-       struct ib_mr *mr = frmr->fr_mr;
+       struct rpcrdma_mw *mw;
+       struct rpcrdma_frmr *frmr;
+       struct ib_mr *mr;
        struct ib_send_wr fastreg_wr, *bad_wr;
        u8 key;
        int len, pageoff;
@@ -192,12 +320,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        u64 pa;
        int page_no;
 
+       mw = seg1->rl_mw;
+       seg1->rl_mw = NULL;
+       do {
+               if (mw)
+                       __frwr_queue_recovery(mw);
+               mw = rpcrdma_get_mw(r_xprt);
+               if (!mw)
+                       return -ENOMEM;
+       } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
+       frmr = &mw->r.frmr;
+       frmr->fr_state = FRMR_IS_VALID;
+
        pageoff = offset_in_page(seg1->mr_offset);
        seg1->mr_offset -= pageoff;     /* start of page */
        seg1->mr_len += pageoff;
        len = -pageoff;
        if (nsegs > ia->ri_max_frmr_depth)
                nsegs = ia->ri_max_frmr_depth;
+
        for (page_no = i = 0; i < nsegs;) {
                rpcrdma_map_one(device, seg, direction);
                pa = seg->mr_dma;
@@ -216,8 +357,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        dprintk("RPC:       %s: Using frmr %p to map %d segments (%d bytes)\n",
                __func__, mw, i, len);
 
-       frmr->fr_state = FRMR_IS_VALID;
-
        memset(&fastreg_wr, 0, sizeof(fastreg_wr));
        fastreg_wr.wr_id = (unsigned long)(void *)mw;
        fastreg_wr.opcode = IB_WR_FAST_REG_MR;
@@ -229,6 +368,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        fastreg_wr.wr.fast_reg.access_flags = writing ?
                                IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
                                IB_ACCESS_REMOTE_READ;
+       mr = frmr->fr_mr;
        key = (u8)(mr->rkey & 0x000000FF);
        ib_update_fast_reg_key(mr, ++key);
        fastreg_wr.wr.fast_reg.rkey = mr->rkey;
@@ -238,6 +378,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        if (rc)
                goto out_senderr;
 
+       seg1->rl_mw = mw;
        seg1->mr_rkey = mr->rkey;
        seg1->mr_base = seg1->mr_dma + pageoff;
        seg1->mr_nsegs = i;
@@ -246,10 +387,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 
 out_senderr:
        dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
-       ib_update_fast_reg_key(mr, --key);
-       frmr->fr_state = FRMR_IS_INVALID;
        while (i--)
                rpcrdma_unmap_one(device, --seg);
+       __frwr_queue_recovery(mw);
        return rc;
 }
 
@@ -261,78 +401,46 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 {
        struct rpcrdma_mr_seg *seg1 = seg;
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
        struct ib_send_wr invalidate_wr, *bad_wr;
        int rc, nsegs = seg->mr_nsegs;
-       struct ib_device *device;
 
-       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+       dprintk("RPC:       %s: FRMR %p\n", __func__, mw);
+
+       seg1->rl_mw = NULL;
+       mw->r.frmr.fr_state = FRMR_IS_INVALID;
 
        memset(&invalidate_wr, 0, sizeof(invalidate_wr));
-       invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+       invalidate_wr.wr_id = (unsigned long)(void *)mw;
        invalidate_wr.opcode = IB_WR_LOCAL_INV;
-       invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+       invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
        DECR_CQCOUNT(&r_xprt->rx_ep);
 
-       read_lock(&ia->ri_qplock);
-       device = ia->ri_id->device;
        while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(device, seg++);
+               rpcrdma_unmap_one(ia->ri_device, seg++);
+       read_lock(&ia->ri_qplock);
        rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
        read_unlock(&ia->ri_qplock);
        if (rc)
                goto out_err;
+
+       rpcrdma_put_mw(r_xprt, mw);
        return nsegs;
 
 out_err:
-       /* Force rpcrdma_buffer_get() to retry */
-       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
        dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+       __frwr_queue_recovery(mw);
        return nsegs;
 }
 
-/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
- * an unusable state. Find FRMRs in this state and dereg / reg
- * each.  FRMRs that are VALID and attached to an rpcrdma_req are
- * also torn down.
- *
- * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_frmr_external().
- */
-static void
-frwr_op_reset(struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
-       unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
-       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
-       struct rpcrdma_mw *r;
-       int rc;
-
-       list_for_each_entry(r, &buf->rb_all, mw_all) {
-               if (r->r.frmr.fr_state == FRMR_IS_INVALID)
-                       continue;
-
-               __frwr_release(r);
-               rc = __frwr_init(r, pd, device, depth);
-               if (rc) {
-                       dprintk("RPC:       %s: mw %p left %s\n",
-                               __func__, r,
-                               (r->r.frmr.fr_state == FRMR_IS_STALE ?
-                                       "stale" : "valid"));
-                       continue;
-               }
-
-               r->r.frmr.fr_state = FRMR_IS_INVALID;
-       }
-}
-
 static void
 frwr_op_destroy(struct rpcrdma_buffer *buf)
 {
        struct rpcrdma_mw *r;
 
+       /* Ensure stale MWs for "buf" are no longer in flight */
+       flush_workqueue(frwr_recovery_wq);
+
        while (!list_empty(&buf->rb_all)) {
                r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
                list_del(&r->mw_all);
@@ -347,7 +455,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
        .ro_open                        = frwr_op_open,
        .ro_maxpages                    = frwr_op_maxpages,
        .ro_init                        = frwr_op_init,
-       .ro_reset                       = frwr_op_reset,
        .ro_destroy                     = frwr_op_destroy,
        .ro_displayname                 = "frwr",
 };
index ba518af167873dfe2e9c1f5f6723665bda2bd2e7..41985d07fdb744b5d9523b7c34af93c30f70522d 100644 (file)
@@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-       rpcrdma_map_one(ia->ri_id->device, seg,
-                       rpcrdma_data_dir(writing));
+       rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
        seg->mr_rkey = ia->ri_bind_mem->rkey;
        seg->mr_base = seg->mr_dma;
        seg->mr_nsegs = 1;
@@ -65,18 +64,10 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-       read_lock(&ia->ri_qplock);
-       rpcrdma_unmap_one(ia->ri_id->device, seg);
-       read_unlock(&ia->ri_qplock);
-
+       rpcrdma_unmap_one(ia->ri_device, seg);
        return 1;
 }
 
-static void
-physical_op_reset(struct rpcrdma_xprt *r_xprt)
-{
-}
-
 static void
 physical_op_destroy(struct rpcrdma_buffer *buf)
 {
@@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
        .ro_open                        = physical_op_open,
        .ro_maxpages                    = physical_op_maxpages,
        .ro_init                        = physical_op_init,
-       .ro_reset                       = physical_op_reset,
        .ro_destroy                     = physical_op_destroy,
        .ro_displayname                 = "physical",
 };
index 2c53ea9e1b83dae01ebdd1aa22d256174dfbae08..84ea37daef36b0aa885c27e5eda950dda818949a 100644 (file)
@@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
        return (unsigned char *)iptr - (unsigned char *)headerp;
 
 out:
-       if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
-               return n;
-
        for (pos = 0; nchunks--;)
                pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
                                                      &req->rl_segments[pos]);
@@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
        struct rpcrdma_msg *headerp;
        struct rpcrdma_req *req;
        struct rpc_rqst *rqst;
-       struct rpc_xprt *xprt = rep->rr_xprt;
-       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
        __be32 *iptr;
        int rdmalen, status;
        unsigned long cwnd;
@@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
                        rep->rr_len);
 repost:
                r_xprt->rx_stats.bad_reply_count++;
-               rep->rr_func = rpcrdma_reply_handler;
                if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
                        rpcrdma_recv_buffer_put(rep);
 
index 436da2caec955ded2b2f4f6a2056cb1191594373..680f888a9ddd045314b305ef772385c7c6d5624e 100644 (file)
@@ -240,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work)
        xprt_clear_connecting(xprt);
 }
 
+static void
+xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
+{
+       struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
+                                                  rx_xprt);
+
+       pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
+       rdma_disconnect(r_xprt->rx_ia.ri_id);
+}
+
 /*
  * xprt_rdma_destroy
  *
@@ -612,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task)
        if (req->rl_reply == NULL)              /* e.g. reconnection */
                rpcrdma_recv_buffer_get(req);
 
-       if (req->rl_reply) {
-               req->rl_reply->rr_func = rpcrdma_reply_handler;
-               /* this need only be done once, but... */
-               req->rl_reply->rr_xprt = xprt;
-       }
-
        /* Must suppress retransmit to maintain credits */
        if (req->rl_connect_cookie == xprt->connect_cookie)
                goto drop_connection;
@@ -676,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
           r_xprt->rx_stats.bad_reply_count);
 }
 
+static int
+xprt_rdma_enable_swap(struct rpc_xprt *xprt)
+{
+       return -EINVAL;
+}
+
+static void
+xprt_rdma_disable_swap(struct rpc_xprt *xprt)
+{
+}
+
 /*
  * Plumbing for rpc transport switch and kernel module
  */
@@ -694,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
        .send_request           = xprt_rdma_send_request,
        .close                  = xprt_rdma_close,
        .destroy                = xprt_rdma_destroy,
-       .print_stats            = xprt_rdma_print_stats
+       .print_stats            = xprt_rdma_print_stats,
+       .enable_swap            = xprt_rdma_enable_swap,
+       .disable_swap           = xprt_rdma_disable_swap,
+       .inject_disconnect      = xprt_rdma_inject_disconnect
 };
 
 static struct xprt_class xprt_rdma = {
@@ -720,17 +738,24 @@ void xprt_rdma_cleanup(void)
        if (rc)
                dprintk("RPC:       %s: xprt_unregister returned %i\n",
                        __func__, rc);
+
+       frwr_destroy_recovery_wq();
 }
 
 int xprt_rdma_init(void)
 {
        int rc;
 
-       rc = xprt_register_transport(&xprt_rdma);
-
+       rc = frwr_alloc_recovery_wq();
        if (rc)
                return rc;
 
+       rc = xprt_register_transport(&xprt_rdma);
+       if (rc) {
+               frwr_destroy_recovery_wq();
+               return rc;
+       }
+
        dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
 
        dprintk("Defaults:\n");
index 52df265b472a9b2b79574c7d9363acba26ea5d8b..891c4ede2c20ea8d8c6bc79ee080f353d4df13d7 100644 (file)
@@ -80,7 +80,6 @@ static void
 rpcrdma_run_tasklet(unsigned long data)
 {
        struct rpcrdma_rep *rep;
-       void (*func)(struct rpcrdma_rep *);
        unsigned long flags;
 
        data = data;
@@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data)
                rep = list_entry(rpcrdma_tasklets_g.next,
                                 struct rpcrdma_rep, rr_list);
                list_del(&rep->rr_list);
-               func = rep->rr_func;
-               rep->rr_func = NULL;
                spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
 
-               if (func)
-                       func(rep);
-               else
-                       rpcrdma_recv_buffer_put(rep);
+               rpcrdma_reply_handler(rep);
 
                spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
        }
@@ -236,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
                __func__, rep, wc->byte_len);
 
        rep->rr_len = wc->byte_len;
-       ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+       ib_dma_sync_single_for_cpu(rep->rr_device,
                                   rdmab_addr(rep->rr_rdmabuf),
                                   rep->rr_len, DMA_FROM_DEVICE);
        prefetch(rdmab_to_msg(rep->rr_rdmabuf));
@@ -407,7 +401,7 @@ connected:
 
                pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
                        sap, rpc_get_port(sap),
-                       ia->ri_id->device->name,
+                       ia->ri_device->name,
                        ia->ri_ops->ro_displayname,
                        xprt->rx_buf.rb_max_requests,
                        ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
@@ -508,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                rc = PTR_ERR(ia->ri_id);
                goto out1;
        }
+       ia->ri_device = ia->ri_id->device;
 
-       ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+       ia->ri_pd = ib_alloc_pd(ia->ri_device);
        if (IS_ERR(ia->ri_pd)) {
                rc = PTR_ERR(ia->ri_pd);
                dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
@@ -517,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                goto out2;
        }
 
-       rc = ib_query_device(ia->ri_id->device, devattr);
+       rc = ib_query_device(ia->ri_device, devattr);
        if (rc) {
                dprintk("RPC:       %s: ib_query_device failed %d\n",
                        __func__, rc);
@@ -526,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 
        if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
                ia->ri_have_dma_lkey = 1;
-               ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+               ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
        }
 
        if (memreg == RPCRDMA_FRMR) {
@@ -541,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                }
        }
        if (memreg == RPCRDMA_MTHCAFMR) {
-               if (!ia->ri_id->device->alloc_fmr) {
+               if (!ia->ri_device->alloc_fmr) {
                        dprintk("RPC:       %s: MTHCAFMR registration "
                                "not supported by HCA\n", __func__);
                        memreg = RPCRDMA_ALLPHYSICAL;
@@ -590,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
        dprintk("RPC:       %s: memory registration strategy is '%s'\n",
                __func__, ia->ri_ops->ro_displayname);
 
-       /* Else will do memory reg/dereg for each chunk */
-       ia->ri_memreg_strategy = memreg;
-
        rwlock_init(&ia->ri_qplock);
        return 0;
 
@@ -622,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
                dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
                        __func__, rc);
        }
+
        if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
                if (ia->ri_id->qp)
                        rdma_destroy_qp(ia->ri_id);
                rdma_destroy_id(ia->ri_id);
                ia->ri_id = NULL;
        }
-       if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
-               rc = ib_dealloc_pd(ia->ri_pd);
-               dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",
-                       __func__, rc);
-       }
+
+       /* If the pd is still busy, xprtrdma missed freeing a resource */
+       if (ia->ri_pd && !IS_ERR(ia->ri_pd))
+               WARN_ON(ib_dealloc_pd(ia->ri_pd));
 }
 
 /*
@@ -693,8 +685,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
 
        cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
-       sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
-                                 rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+       sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
+                             rpcrdma_cq_async_error_upcall, ep, &cq_attr);
        if (IS_ERR(sendcq)) {
                rc = PTR_ERR(sendcq);
                dprintk("RPC:       %s: failed to create send CQ: %i\n",
@@ -710,8 +702,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        }
 
        cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
-       recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
-                                 rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+       recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
+                             rpcrdma_cq_async_error_upcall, ep, &cq_attr);
        if (IS_ERR(recvcq)) {
                rc = PTR_ERR(recvcq);
                dprintk("RPC:       %s: failed to create recv CQ: %i\n",
@@ -817,8 +809,6 @@ retry:
                rpcrdma_flush_cqs(ep);
 
                xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
-               ia->ri_ops->ro_reset(xprt);
-
                id = rpcrdma_create_id(xprt, ia,
                                (struct sockaddr *)&xprt->rx_data.addr);
                if (IS_ERR(id)) {
@@ -832,7 +822,7 @@ retry:
                 * More stuff I haven't thought of!
                 * Rrrgh!
                 */
-               if (ia->ri_id->device != id->device) {
+               if (ia->ri_device != id->device) {
                        printk("RPC:       %s: can't reconnect on "
                                "different device!\n", __func__);
                        rdma_destroy_id(id);
@@ -974,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
                goto out_free;
        }
 
-       rep->rr_buffer = &r_xprt->rx_buf;
+       rep->rr_device = ia->ri_device;
+       rep->rr_rxprt = r_xprt;
        return rep;
 
 out_free:
@@ -1098,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
        kfree(buf->rb_pool);
 }
 
-/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
- * some req segments uninitialized.
- */
-static void
-rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
+struct rpcrdma_mw *
+rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 {
-       if (*mw) {
-               list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
-               *mw = NULL;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct rpcrdma_mw *mw = NULL;
+
+       spin_lock(&buf->rb_mwlock);
+       if (!list_empty(&buf->rb_mws)) {
+               mw = list_first_entry(&buf->rb_mws,
+                                     struct rpcrdma_mw, mw_list);
+               list_del_init(&mw->mw_list);
        }
+       spin_unlock(&buf->rb_mwlock);
+
+       if (!mw)
+               pr_err("RPC:       %s: no MWs available\n", __func__);
+       return mw;
 }
 
-/* Cycle mw's back in reverse order, and "spin" them.
- * This delays and scrambles reuse as much as possible.
- */
-static void
-rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+void
+rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
 {
-       struct rpcrdma_mr_seg *seg = req->rl_segments;
-       struct rpcrdma_mr_seg *seg1 = seg;
-       int i;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 
-       for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
-               rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
-       rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
+       spin_lock(&buf->rb_mwlock);
+       list_add_tail(&mw->mw_list, &buf->rb_mws);
+       spin_unlock(&buf->rb_mwlock);
 }
 
 static void
@@ -1132,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
        req->rl_niovs = 0;
        if (req->rl_reply) {
                buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
-               req->rl_reply->rr_func = NULL;
                req->rl_reply = NULL;
        }
 }
 
-/* rpcrdma_unmap_one() was already done during deregistration.
- * Redo only the ib_post_send().
- */
-static void
-rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_xprt *r_xprt =
-                               container_of(ia, struct rpcrdma_xprt, rx_ia);
-       struct ib_send_wr invalidate_wr, *bad_wr;
-       int rc;
-
-       dprintk("RPC:       %s: FRMR %p is stale\n", __func__, r);
-
-       /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
-       r->r.frmr.fr_state = FRMR_IS_INVALID;
-
-       memset(&invalidate_wr, 0, sizeof(invalidate_wr));
-       invalidate_wr.wr_id = (unsigned long)(void *)r;
-       invalidate_wr.opcode = IB_WR_LOCAL_INV;
-       invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-
-       dprintk("RPC:       %s: frmr %p invalidating rkey %08x\n",
-               __func__, r, r->r.frmr.fr_mr->rkey);
-
-       read_lock(&ia->ri_qplock);
-       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-       read_unlock(&ia->ri_qplock);
-       if (rc) {
-               /* Force rpcrdma_buffer_get() to retry */
-               r->r.frmr.fr_state = FRMR_IS_STALE;
-               dprintk("RPC:       %s: ib_post_send failed, %i\n",
-                       __func__, rc);
-       }
-}
-
-static void
-rpcrdma_retry_flushed_linv(struct list_head *stale,
-                          struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_ia *ia = rdmab_to_ia(buf);
-       struct list_head *pos;
-       struct rpcrdma_mw *r;
-       unsigned long flags;
-
-       list_for_each(pos, stale) {
-               r = list_entry(pos, struct rpcrdma_mw, mw_list);
-               rpcrdma_retry_local_inv(r, ia);
-       }
-
-       spin_lock_irqsave(&buf->rb_lock, flags);
-       list_splice_tail(stale, &buf->rb_mws);
-       spin_unlock_irqrestore(&buf->rb_lock, flags);
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
-                        struct list_head *stale)
-{
-       struct rpcrdma_mw *r;
-       int i;
-
-       i = RPCRDMA_MAX_SEGS - 1;
-       while (!list_empty(&buf->rb_mws)) {
-               r = list_entry(buf->rb_mws.next,
-                              struct rpcrdma_mw, mw_list);
-               list_del(&r->mw_list);
-               if (r->r.frmr.fr_state == FRMR_IS_STALE) {
-                       list_add(&r->mw_list, stale);
-                       continue;
-               }
-               req->rl_segments[i].rl_mw = r;
-               if (unlikely(i-- == 0))
-                       return req;     /* Success */
-       }
-
-       /* Not enough entries on rb_mws for this req */
-       rpcrdma_buffer_put_sendbuf(req, buf);
-       rpcrdma_buffer_put_mrs(req, buf);
-       return NULL;
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int i;
-
-       i = RPCRDMA_MAX_SEGS - 1;
-       while (!list_empty(&buf->rb_mws)) {
-               r = list_entry(buf->rb_mws.next,
-                              struct rpcrdma_mw, mw_list);
-               list_del(&r->mw_list);
-               req->rl_segments[i].rl_mw = r;
-               if (unlikely(i-- == 0))
-                       return req;     /* Success */
-       }
-
-       /* Not enough entries on rb_mws for this req */
-       rpcrdma_buffer_put_sendbuf(req, buf);
-       rpcrdma_buffer_put_mrs(req, buf);
-       return NULL;
-}
-
 /*
  * Get a set of request/reply buffers.
  *
@@ -1253,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 {
-       struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
-       struct list_head stale;
        struct rpcrdma_req *req;
        unsigned long flags;
 
        spin_lock_irqsave(&buffers->rb_lock, flags);
+
        if (buffers->rb_send_index == buffers->rb_max_requests) {
                spin_unlock_irqrestore(&buffers->rb_lock, flags);
                dprintk("RPC:       %s: out of request buffers\n", __func__);
@@ -1277,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        }
        buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
 
-       INIT_LIST_HEAD(&stale);
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-               req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
-               break;
-       case RPCRDMA_MTHCAFMR:
-               req = rpcrdma_buffer_get_fmrs(req, buffers);
-               break;
-       default:
-               break;
-       }
        spin_unlock_irqrestore(&buffers->rb_lock, flags);
-       if (!list_empty(&stale))
-               rpcrdma_retry_flushed_linv(&stale, buffers);
        return req;
 }
 
@@ -1302,19 +1176,10 @@ void
 rpcrdma_buffer_put(struct rpcrdma_req *req)
 {
        struct rpcrdma_buffer *buffers = req->rl_buffer;
-       struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
        unsigned long flags;
 
        spin_lock_irqsave(&buffers->rb_lock, flags);
        rpcrdma_buffer_put_sendbuf(req, buffers);
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-       case RPCRDMA_MTHCAFMR:
-               rpcrdma_buffer_put_mrs(req, buffers);
-               break;
-       default:
-               break;
-       }
        spin_unlock_irqrestore(&buffers->rb_lock, flags);
 }
 
@@ -1344,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
 void
 rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
 {
-       struct rpcrdma_buffer *buffers = rep->rr_buffer;
+       struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
        unsigned long flags;
 
-       rep->rr_func = NULL;
        spin_lock_irqsave(&buffers->rb_lock, flags);
        buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
        spin_unlock_irqrestore(&buffers->rb_lock, flags);
@@ -1376,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
        /*
         * All memory passed here was kmalloc'ed, therefore phys-contiguous.
         */
-       iov->addr = ib_dma_map_single(ia->ri_id->device,
+       iov->addr = ib_dma_map_single(ia->ri_device,
                        va, len, DMA_BIDIRECTIONAL);
-       if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
+       if (ib_dma_mapping_error(ia->ri_device, iov->addr))
                return -ENOMEM;
 
        iov->length = len;
@@ -1422,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
 {
        int rc;
 
-       ib_dma_unmap_single(ia->ri_id->device,
-                       iov->addr, iov->length, DMA_BIDIRECTIONAL);
+       ib_dma_unmap_single(ia->ri_device,
+                           iov->addr, iov->length, DMA_BIDIRECTIONAL);
 
        if (NULL == mr)
                return 0;
@@ -1516,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
        send_wr.num_sge = req->rl_niovs;
        send_wr.opcode = IB_WR_SEND;
        if (send_wr.num_sge == 4)       /* no need to sync any pad (constant) */
-               ib_dma_sync_single_for_device(ia->ri_id->device,
-                       req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
-                       DMA_TO_DEVICE);
-       ib_dma_sync_single_for_device(ia->ri_id->device,
-               req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
-               DMA_TO_DEVICE);
-       ib_dma_sync_single_for_device(ia->ri_id->device,
-               req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
-               DMA_TO_DEVICE);
+               ib_dma_sync_single_for_device(ia->ri_device,
+                                             req->rl_send_iov[3].addr,
+                                             req->rl_send_iov[3].length,
+                                             DMA_TO_DEVICE);
+       ib_dma_sync_single_for_device(ia->ri_device,
+                                     req->rl_send_iov[1].addr,
+                                     req->rl_send_iov[1].length,
+                                     DMA_TO_DEVICE);
+       ib_dma_sync_single_for_device(ia->ri_device,
+                                     req->rl_send_iov[0].addr,
+                                     req->rl_send_iov[0].length,
+                                     DMA_TO_DEVICE);
 
        if (DECR_CQCOUNT(ep) > 0)
                send_wr.send_flags = 0;
@@ -1557,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
        recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
        recv_wr.num_sge = 1;
 
-       ib_dma_sync_single_for_cpu(ia->ri_id->device,
+       ib_dma_sync_single_for_cpu(ia->ri_device,
                                   rdmab_addr(rep->rr_rdmabuf),
                                   rdmab_length(rep->rr_rdmabuf),
                                   DMA_BIDIRECTIONAL);
index 58163b88738c2363c559be28d3f3807043065826..f49dd8b381221dceaef4847e4ae28d397ebcdf27 100644 (file)
@@ -62,6 +62,7 @@
 struct rpcrdma_ia {
        const struct rpcrdma_memreg_ops *ri_ops;
        rwlock_t                ri_qplock;
+       struct ib_device        *ri_device;
        struct rdma_cm_id       *ri_id;
        struct ib_pd            *ri_pd;
        struct ib_mr            *ri_bind_mem;
@@ -69,7 +70,6 @@ struct rpcrdma_ia {
        int                     ri_have_dma_lkey;
        struct completion       ri_done;
        int                     ri_async_rc;
-       enum rpcrdma_memreg     ri_memreg_strategy;
        unsigned int            ri_max_frmr_depth;
        struct ib_device_attr   ri_devattr;
        struct ib_qp_attr       ri_qp_attr;
@@ -173,9 +173,8 @@ struct rpcrdma_buffer;
 
 struct rpcrdma_rep {
        unsigned int            rr_len;
-       struct rpcrdma_buffer   *rr_buffer;
-       struct rpc_xprt         *rr_xprt;
-       void                    (*rr_func)(struct rpcrdma_rep *);
+       struct ib_device        *rr_device;
+       struct rpcrdma_xprt     *rr_rxprt;
        struct list_head        rr_list;
        struct rpcrdma_regbuf   *rr_rdmabuf;
 };
@@ -203,11 +202,18 @@ struct rpcrdma_frmr {
        struct ib_fast_reg_page_list    *fr_pgl;
        struct ib_mr                    *fr_mr;
        enum rpcrdma_frmr_state         fr_state;
+       struct work_struct              fr_work;
+       struct rpcrdma_xprt             *fr_xprt;
+};
+
+struct rpcrdma_fmr {
+       struct ib_fmr           *fmr;
+       u64                     *physaddrs;
 };
 
 struct rpcrdma_mw {
        union {
-               struct ib_fmr           *fmr;
+               struct rpcrdma_fmr      fmr;
                struct rpcrdma_frmr     frmr;
        } r;
        void                    (*mw_sendcompletion)(struct ib_wc *);
@@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
  * One of these is associated with a transport instance
  */
 struct rpcrdma_buffer {
-       spinlock_t      rb_lock;        /* protects indexes */
-       u32             rb_max_requests;/* client max requests */
-       struct list_head rb_mws;        /* optional memory windows/fmrs/frmrs */
-       struct list_head rb_all;
-       int             rb_send_index;
+       spinlock_t              rb_mwlock;      /* protect rb_mws list */
+       struct list_head        rb_mws;
+       struct list_head        rb_all;
+       char                    *rb_pool;
+
+       spinlock_t              rb_lock;        /* protect buf arrays */
+       u32                     rb_max_requests;
+       int                     rb_send_index;
+       int                     rb_recv_index;
        struct rpcrdma_req      **rb_send_bufs;
-       int             rb_recv_index;
        struct rpcrdma_rep      **rb_recv_bufs;
-       char            *rb_pool;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
 
@@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops {
                                   struct rpcrdma_create_data_internal *);
        size_t          (*ro_maxpages)(struct rpcrdma_xprt *);
        int             (*ro_init)(struct rpcrdma_xprt *);
-       void            (*ro_reset)(struct rpcrdma_xprt *);
        void            (*ro_destroy)(struct rpcrdma_buffer *);
        const char      *ro_displayname;
 };
@@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
 
+struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
+void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
 void rpcrdma_buffer_put(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
@@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
 
 unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
 
+int frwr_alloc_recovery_wq(void);
+void frwr_destroy_recovery_wq(void);
+
 /*
  * Wrappers for chunk registration, shared by read/write chunk code.
  */
index b0517287075b2753101bbde70262c99c2a58f35b..e193c2b5476b3a83973e9799e2e826fdcd2b842c 100644 (file)
@@ -622,24 +622,6 @@ process_status:
        return status;
 }
 
-/**
- * xs_tcp_shutdown - gracefully shut down a TCP socket
- * @xprt: transport
- *
- * Initiates a graceful shutdown of the TCP socket by calling the
- * equivalent of shutdown(SHUT_RDWR);
- */
-static void xs_tcp_shutdown(struct rpc_xprt *xprt)
-{
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-       struct socket *sock = transport->sock;
-
-       if (sock != NULL) {
-               kernel_sock_shutdown(sock, SHUT_RDWR);
-               trace_rpc_socket_shutdown(xprt, sock);
-       }
-}
-
 /**
  * xs_tcp_send_request - write an RPC request to a TCP socket
  * @task: address of RPC task that manages the state of an RPC request
@@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
        xs_sock_reset_connection_flags(xprt);
        /* Mark transport as closed and wake up all pending tasks */
        xprt_disconnect_done(xprt);
+       xprt_force_disconnect(xprt);
 }
 
 /**
@@ -827,6 +810,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
        if (sk == NULL)
                return;
 
+       if (atomic_read(&transport->xprt.swapper))
+               sk_clear_memalloc(sk);
+
        write_lock_bh(&sk->sk_callback_lock);
        transport->inet = NULL;
        transport->sock = NULL;
@@ -863,6 +849,13 @@ static void xs_close(struct rpc_xprt *xprt)
        xprt_disconnect_done(xprt);
 }
 
+static void xs_inject_disconnect(struct rpc_xprt *xprt)
+{
+       dprintk("RPC:       injecting transport disconnect on xprt=%p\n",
+               xprt);
+       xprt_disconnect_done(xprt);
+}
+
 static void xs_xprt_free(struct rpc_xprt *xprt)
 {
        xs_free_peer_addresses(xprt);
@@ -901,7 +894,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 /**
  * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
  * @sk: socket with data to read
- * @len: how much data to read
  *
  * Currently this assumes we can read the whole reply in a single gulp.
  */
@@ -965,7 +957,6 @@ static void xs_local_data_ready(struct sock *sk)
 /**
  * xs_udp_data_ready - "data ready" callback for UDP sockets
  * @sk: socket with data to read
- * @len: how much data to read
  *
  */
 static void xs_udp_data_ready(struct sock *sk)
@@ -1389,7 +1380,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
 /**
  * xs_tcp_data_ready - "data ready" callback for TCP sockets
  * @sk: socket with data to read
- * @bytes: how much data to read
  *
  */
 static void xs_tcp_data_ready(struct sock *sk)
@@ -1886,9 +1876,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 
 /**
  * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
- * @xprt: RPC transport to connect
  * @transport: socket transport to connect
- * @create_sock: function to create a socket of the correct type
  */
 static int xs_local_setup_socket(struct sock_xprt *transport)
 {
@@ -1960,43 +1948,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                msleep_interruptible(15000);
 }
 
-#ifdef CONFIG_SUNRPC_SWAP
+#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+/*
+ * Note that this should be called with XPRT_LOCKED held (or when we otherwise
+ * know that we have exclusive access to the socket), to guard against
+ * races with xs_reset_transport.
+ */
 static void xs_set_memalloc(struct rpc_xprt *xprt)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
                        xprt);
 
-       if (xprt->swapper)
+       /*
+        * If there's no sock, then we have nothing to set. The
+        * reconnecting process will get it for us.
+        */
+       if (!transport->inet)
+               return;
+       if (atomic_read(&xprt->swapper))
                sk_set_memalloc(transport->inet);
 }
 
 /**
- * xs_swapper - Tag this transport as being used for swap.
+ * xs_enable_swap - Tag this transport as being used for swap.
  * @xprt: transport to tag
- * @enable: enable/disable
  *
+ * Take a reference to this transport on behalf of the rpc_clnt, and
+ * optionally mark it for swapping if it wasn't already.
  */
-int xs_swapper(struct rpc_xprt *xprt, int enable)
+static int
+xs_enable_swap(struct rpc_xprt *xprt)
 {
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
-                       xprt);
-       int err = 0;
+       struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
 
-       if (enable) {
-               xprt->swapper++;
-               xs_set_memalloc(xprt);
-       } else if (xprt->swapper) {
-               xprt->swapper--;
-               sk_clear_memalloc(transport->inet);
-       }
+       if (atomic_inc_return(&xprt->swapper) != 1)
+               return 0;
+       if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+               return -ERESTARTSYS;
+       if (xs->inet)
+               sk_set_memalloc(xs->inet);
+       xprt_release_xprt(xprt, NULL);
+       return 0;
+}
 
-       return err;
+/**
+ * xs_disable_swap - Untag this transport as being used for swap.
+ * @xprt: transport to tag
+ *
+ * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the
+ * swapper refcount goes to 0, untag the socket as a memalloc socket.
+ */
+static void
+xs_disable_swap(struct rpc_xprt *xprt)
+{
+       struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
+
+       if (!atomic_dec_and_test(&xprt->swapper))
+               return;
+       if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+               return;
+       if (xs->inet)
+               sk_clear_memalloc(xs->inet);
+       xprt_release_xprt(xprt, NULL);
 }
-EXPORT_SYMBOL_GPL(xs_swapper);
 #else
 static void xs_set_memalloc(struct rpc_xprt *xprt)
 {
 }
+
+static int
+xs_enable_swap(struct rpc_xprt *xprt)
+{
+       return -EINVAL;
+}
+
+static void
+xs_disable_swap(struct rpc_xprt *xprt)
+{
+}
 #endif
 
 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -2057,6 +2086,27 @@ out:
        xprt_wake_pending_tasks(xprt, status);
 }
 
+/**
+ * xs_tcp_shutdown - gracefully shut down a TCP socket
+ * @xprt: transport
+ *
+ * Initiates a graceful shutdown of the TCP socket by calling the
+ * equivalent of shutdown(SHUT_RDWR);
+ */
+static void xs_tcp_shutdown(struct rpc_xprt *xprt)
+{
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       struct socket *sock = transport->sock;
+
+       if (sock == NULL)
+               return;
+       if (xprt_connected(xprt)) {
+               kernel_sock_shutdown(sock, SHUT_RDWR);
+               trace_rpc_socket_shutdown(xprt, sock);
+       } else
+               xs_reset_transport(transport);
+}
+
 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2067,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                unsigned int keepidle = xprt->timeout->to_initval / HZ;
                unsigned int keepcnt = xprt->timeout->to_retries + 1;
                unsigned int opt_on = 1;
+               unsigned int timeo;
 
                /* TCP Keepalive options */
                kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2078,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
                                (char *)&keepcnt, sizeof(keepcnt));
 
+               /* TCP user timeout (see RFC5482) */
+               timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
+                       (xprt->timeout->to_retries + 1);
+               kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+                               (char *)&timeo, sizeof(timeo));
+
                write_lock_bh(&sk->sk_callback_lock);
 
                xs_save_old_callbacks(transport, sk);
@@ -2125,9 +2182,6 @@ out:
 
 /**
  * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
- * @xprt: RPC transport to connect
- * @transport: socket transport to connect
- * @create_sock: function to create a socket of the correct type
  *
  * Invoked by a work queue tasklet.
  */
@@ -2463,6 +2517,8 @@ static struct rpc_xprt_ops xs_local_ops = {
        .close                  = xs_close,
        .destroy                = xs_destroy,
        .print_stats            = xs_local_print_stats,
+       .enable_swap            = xs_enable_swap,
+       .disable_swap           = xs_disable_swap,
 };
 
 static struct rpc_xprt_ops xs_udp_ops = {
@@ -2482,6 +2538,9 @@ static struct rpc_xprt_ops xs_udp_ops = {
        .close                  = xs_close,
        .destroy                = xs_destroy,
        .print_stats            = xs_udp_print_stats,
+       .enable_swap            = xs_enable_swap,
+       .disable_swap           = xs_disable_swap,
+       .inject_disconnect      = xs_inject_disconnect,
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
@@ -2498,6 +2557,9 @@ static struct rpc_xprt_ops xs_tcp_ops = {
        .close                  = xs_tcp_shutdown,
        .destroy                = xs_destroy,
        .print_stats            = xs_tcp_print_stats,
+       .enable_swap            = xs_enable_swap,
+       .disable_swap           = xs_disable_swap,
+       .inject_disconnect      = xs_inject_disconnect,
 };
 
 /*
@@ -2515,6 +2577,9 @@ static struct rpc_xprt_ops bc_tcp_ops = {
        .close                  = bc_close,
        .destroy                = bc_destroy,
        .print_stats            = xs_tcp_print_stats,
+       .enable_swap            = xs_enable_swap,
+       .disable_swap           = xs_disable_swap,
+       .inject_disconnect      = xs_inject_disconnect,
 };
 
 static int xs_init_anyaddr(const int family, struct sockaddr *sap)
index f52abae0ec5fe84feb006b217e495849373561d7..aceaaed098112dbc73a6f8e0c526e5e9edd55a99 100644 (file)
@@ -86,7 +86,7 @@ $(simple-targets): $(obj)/conf
 PHONY += oldnoconfig savedefconfig defconfig
 
 # oldnoconfig is an alias of olddefconfig, because people already are dependent
-# on its behavior(sets new symbols to their default value but not 'n') with the
+# on its behavior (sets new symbols to their default value but not 'n') with the
 # counter-intuitive name.
 oldnoconfig: olddefconfig
 
@@ -126,10 +126,11 @@ tinyconfig:
 # Help text used by make help
 help:
        @echo  '  config          - Update current config utilising a line-oriented program'
-       @echo  '  nconfig         - Update current config utilising a ncurses menu based program'
+       @echo  '  nconfig         - Update current config utilising a ncurses menu based'
+       @echo  '                    program'
        @echo  '  menuconfig      - Update current config utilising a menu based program'
-       @echo  '  xconfig         - Update current config utilising a QT based front-end'
-       @echo  '  gconfig         - Update current config utilising a GTK based front-end'
+       @echo  '  xconfig         - Update current config utilising a Qt based front-end'
+       @echo  '  gconfig         - Update current config utilising a GTK+ based front-end'
        @echo  '  oldconfig       - Update current config utilising a provided .config as base'
        @echo  '  localmodconfig  - Update current config disabling modules not loaded'
        @echo  '  localyesconfig  - Update current config converting local mods to core'
@@ -142,7 +143,8 @@ help:
        @echo  '  alldefconfig    - New config with all symbols set to default'
        @echo  '  randconfig      - New config with random answer to all options'
        @echo  '  listnewconfig   - List new options'
-       @echo  '  olddefconfig    - Same as silentoldconfig but sets new symbols to their default value'
+       @echo  '  olddefconfig    - Same as silentoldconfig but sets new symbols to their'
+       @echo  '                    default value'
        @echo  '  kvmconfig       - Enable additional options for kvm guest kernel support'
        @echo  '  xenconfig       - Enable additional options for xen dom0 and guest kernel support'
        @echo  '  tinyconfig      - Configure the tiniest possible kernel'
@@ -163,9 +165,9 @@ HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) \
 # mconf:  Used for the menuconfig target
 #         Utilizes the lxdialog package
 # qconf:  Used for the xconfig target
-#         Based on QT which needs to be installed to compile it
+#         Based on Qt which needs to be installed to compile it
 # gconf:  Used for the gconfig target
-#         Based on GTK which needs to be installed to compile it
+#         Based on GTK+ which needs to be installed to compile it
 # object files used by all kconfig flavours
 
 lxdialog := lxdialog/checklist.o lxdialog/util.o lxdialog/inputbox.o
@@ -222,11 +224,11 @@ ifeq ($(MAKECMDGOALS),xconfig)
 $(obj)/.tmp_qtcheck: $(src)/Makefile
 -include $(obj)/.tmp_qtcheck
 
-# QT needs some extra effort...
+# Qt needs some extra effort...
 $(obj)/.tmp_qtcheck:
        @set -e; $(kecho) "  CHECK   qt"; dir=""; pkg=""; \
        if ! pkg-config --exists QtCore 2> /dev/null; then \
-           echo "* Unable to find the QT4 tool qmake. Trying to use QT3"; \
+           echo "* Unable to find the Qt4 tool qmake. Trying to use Qt3"; \
            pkg-config --exists qt 2> /dev/null && pkg=qt; \
            pkg-config --exists qt-mt 2> /dev/null && pkg=qt-mt; \
            if [ -n "$$pkg" ]; then \
@@ -240,8 +242,8 @@ $(obj)/.tmp_qtcheck:
              done; \
              if [ -z "$$dir" ]; then \
                echo >&2 "*"; \
-               echo >&2 "* Unable to find any QT installation. Please make sure that"; \
-               echo >&2 "* the QT4 or QT3 development package is correctly installed and"; \
+               echo >&2 "* Unable to find any Qt installation. Please make sure that"; \
+               echo >&2 "* the Qt4 or Qt3 development package is correctly installed and"; \
                echo >&2 "* either qmake can be found or install pkg-config or set"; \
                echo >&2 "* the QTDIR environment variable to the correct location."; \
                echo >&2 "*"; \
@@ -278,7 +280,7 @@ $(obj)/gconf.o: $(obj)/.tmp_gtkcheck
 ifeq ($(MAKECMDGOALS),gconfig)
 -include $(obj)/.tmp_gtkcheck
 
-# GTK needs some extra effort, too...
+# GTK+ needs some extra effort, too...
 $(obj)/.tmp_gtkcheck:
        @if `pkg-config --exists gtk+-2.0 gmodule-2.0 libglade-2.0`; then               \
                if `pkg-config --atleast-version=2.0.0 gtk+-2.0`; then                  \
@@ -309,7 +311,7 @@ quiet_cmd_moc = MOC     $@
 $(obj)/%.moc: $(src)/%.h $(obj)/.tmp_qtcheck
        $(call cmd,moc)
 
-# Extract gconf menu items for I18N support
+# Extract gconf menu items for i18n support
 $(obj)/gconf.glade.h: $(obj)/gconf.glade
        $(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \
        $(obj)/gconf.glade
index fb0a2a286dca6a8b0cc0c74deb5d23096be096c3..667d1aa237114453c28bafac618e9552405a19c4 100644 (file)
@@ -13,9 +13,6 @@
 
 static int expr_eq(struct expr *e1, struct expr *e2);
 static struct expr *expr_eliminate_yn(struct expr *e);
-static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2);
-static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2);
-static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2);
 
 struct expr *expr_alloc_symbol(struct symbol *sym)
 {
@@ -82,6 +79,10 @@ struct expr *expr_copy(const struct expr *org)
                e->left.expr = expr_copy(org->left.expr);
                break;
        case E_EQUAL:
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
                e->left.sym = org->left.sym;
                e->right.sym = org->right.sym;
@@ -114,6 +115,10 @@ void expr_free(struct expr *e)
                expr_free(e->left.expr);
                return;
        case E_EQUAL:
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
                break;
        case E_OR:
@@ -200,6 +205,10 @@ static int expr_eq(struct expr *e1, struct expr *e2)
                return 0;
        switch (e1->type) {
        case E_EQUAL:
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
                return e1->left.sym == e2->left.sym && e1->right.sym == e2->right.sym;
        case E_SYMBOL:
@@ -559,62 +568,6 @@ static void expr_eliminate_dups1(enum expr_type type, struct expr **ep1, struct
 #undef e2
 }
 
-static void expr_eliminate_dups2(enum expr_type type, struct expr **ep1, struct expr **ep2)
-{
-#define e1 (*ep1)
-#define e2 (*ep2)
-       struct expr *tmp, *tmp1, *tmp2;
-
-       if (e1->type == type) {
-               expr_eliminate_dups2(type, &e1->left.expr, &e2);
-               expr_eliminate_dups2(type, &e1->right.expr, &e2);
-               return;
-       }
-       if (e2->type == type) {
-               expr_eliminate_dups2(type, &e1, &e2->left.expr);
-               expr_eliminate_dups2(type, &e1, &e2->right.expr);
-       }
-       if (e1 == e2)
-               return;
-
-       switch (e1->type) {
-       case E_OR:
-               expr_eliminate_dups2(e1->type, &e1, &e1);
-               // (FOO || BAR) && (!FOO && !BAR) -> n
-               tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1)));
-               tmp2 = expr_copy(e2);
-               tmp = expr_extract_eq_and(&tmp1, &tmp2);
-               if (expr_is_yes(tmp1)) {
-                       expr_free(e1);
-                       e1 = expr_alloc_symbol(&symbol_no);
-                       trans_count++;
-               }
-               expr_free(tmp2);
-               expr_free(tmp1);
-               expr_free(tmp);
-               break;
-       case E_AND:
-               expr_eliminate_dups2(e1->type, &e1, &e1);
-               // (FOO && BAR) || (!FOO || !BAR) -> y
-               tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1)));
-               tmp2 = expr_copy(e2);
-               tmp = expr_extract_eq_or(&tmp1, &tmp2);
-               if (expr_is_no(tmp1)) {
-                       expr_free(e1);
-                       e1 = expr_alloc_symbol(&symbol_yes);
-                       trans_count++;
-               }
-               expr_free(tmp2);
-               expr_free(tmp1);
-               expr_free(tmp);
-               break;
-       default:
-               ;
-       }
-#undef e1
-#undef e2
-}
-
 struct expr *expr_eliminate_dups(struct expr *e)
 {
        int oldcount;
@@ -627,7 +580,6 @@ struct expr *expr_eliminate_dups(struct expr *e)
                switch (e->type) {
                case E_OR: case E_AND:
                        expr_eliminate_dups1(e->type, &e, &e);
-                       expr_eliminate_dups2(e->type, &e, &e);
                default:
                        ;
                }
@@ -647,6 +599,10 @@ struct expr *expr_transform(struct expr *e)
                return NULL;
        switch (e->type) {
        case E_EQUAL:
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
        case E_SYMBOL:
        case E_LIST:
@@ -719,6 +675,22 @@ struct expr *expr_transform(struct expr *e)
                        e = tmp;
                        e->type = e->type == E_EQUAL ? E_UNEQUAL : E_EQUAL;
                        break;
+               case E_LEQ:
+               case E_GEQ:
+                       // !a<='x' -> a>'x'
+                       tmp = e->left.expr;
+                       free(e);
+                       e = tmp;
+                       e->type = e->type == E_LEQ ? E_GTH : E_LTH;
+                       break;
+               case E_LTH:
+               case E_GTH:
+                       // !a<'x' -> a>='x'
+                       tmp = e->left.expr;
+                       free(e);
+                       e = tmp;
+                       e->type = e->type == E_LTH ? E_GEQ : E_LEQ;
+                       break;
                case E_OR:
                        // !(a || b) -> !a && !b
                        tmp = e->left.expr;
@@ -789,6 +761,10 @@ int expr_contains_symbol(struct expr *dep, struct symbol *sym)
        case E_SYMBOL:
                return dep->left.sym == sym;
        case E_EQUAL:
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
                return dep->left.sym == sym ||
                       dep->right.sym == sym;
@@ -829,57 +805,6 @@ bool expr_depends_symbol(struct expr *dep, struct symbol *sym)
        return false;
 }
 
-static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2)
-{
-       struct expr *tmp = NULL;
-       expr_extract_eq(E_AND, &tmp, ep1, ep2);
-       if (tmp) {
-               *ep1 = expr_eliminate_yn(*ep1);
-               *ep2 = expr_eliminate_yn(*ep2);
-       }
-       return tmp;
-}
-
-static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2)
-{
-       struct expr *tmp = NULL;
-       expr_extract_eq(E_OR, &tmp, ep1, ep2);
-       if (tmp) {
-               *ep1 = expr_eliminate_yn(*ep1);
-               *ep2 = expr_eliminate_yn(*ep2);
-       }
-       return tmp;
-}
-
-static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2)
-{
-#define e1 (*ep1)
-#define e2 (*ep2)
-       if (e1->type == type) {
-               expr_extract_eq(type, ep, &e1->left.expr, &e2);
-               expr_extract_eq(type, ep, &e1->right.expr, &e2);
-               return;
-       }
-       if (e2->type == type) {
-               expr_extract_eq(type, ep, ep1, &e2->left.expr);
-               expr_extract_eq(type, ep, ep1, &e2->right.expr);
-               return;
-       }
-       if (expr_eq(e1, e2)) {
-               *ep = *ep ? expr_alloc_two(type, *ep, e1) : e1;
-               expr_free(e2);
-               if (type == E_AND) {
-                       e1 = expr_alloc_symbol(&symbol_yes);
-                       e2 = expr_alloc_symbol(&symbol_yes);
-               } else if (type == E_OR) {
-                       e1 = expr_alloc_symbol(&symbol_no);
-                       e2 = expr_alloc_symbol(&symbol_no);
-               }
-       }
-#undef e1
-#undef e2
-}
-
 struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symbol *sym)
 {
        struct expr *e1, *e2;
@@ -914,6 +839,10 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb
        case E_NOT:
                return expr_trans_compare(e->left.expr, type == E_EQUAL ? E_UNEQUAL : E_EQUAL, sym);
        case E_UNEQUAL:
+       case E_LTH:
+       case E_LEQ:
+       case E_GTH:
+       case E_GEQ:
        case E_EQUAL:
                if (type == E_EQUAL) {
                        if (sym == &symbol_yes)
@@ -941,10 +870,57 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb
        return NULL;
 }
 
+enum string_value_kind {
+       k_string,
+       k_signed,
+       k_unsigned,
+       k_invalid
+};
+
+union string_value {
+       unsigned long long u;
+       signed long long s;
+};
+
+static enum string_value_kind expr_parse_string(const char *str,
+                                               enum symbol_type type,
+                                               union string_value *val)
+{
+       char *tail;
+       enum string_value_kind kind;
+
+       errno = 0;
+       switch (type) {
+       case S_BOOLEAN:
+       case S_TRISTATE:
+               return k_string;
+       case S_INT:
+               val->s = strtoll(str, &tail, 10);
+               kind = k_signed;
+               break;
+       case S_HEX:
+               val->u = strtoull(str, &tail, 16);
+               kind = k_unsigned;
+               break;
+       case S_STRING:
+       case S_UNKNOWN:
+               val->s = strtoll(str, &tail, 0);
+               kind = k_signed;
+               break;
+       default:
+               return k_invalid;
+       }
+       return !errno && !*tail && tail > str && isxdigit(tail[-1])
+              ? kind : k_string;
+}
+
 tristate expr_calc_value(struct expr *e)
 {
        tristate val1, val2;
        const char *str1, *str2;
+       enum string_value_kind k1 = k_string, k2 = k_string;
+       union string_value lval = {}, rval = {};
+       int res;
 
        if (!e)
                return yes;
@@ -965,21 +941,57 @@ tristate expr_calc_value(struct expr *e)
                val1 = expr_calc_value(e->left.expr);
                return EXPR_NOT(val1);
        case E_EQUAL:
-               sym_calc_value(e->left.sym);
-               sym_calc_value(e->right.sym);
-               str1 = sym_get_string_value(e->left.sym);
-               str2 = sym_get_string_value(e->right.sym);
-               return !strcmp(str1, str2) ? yes : no;
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
-               sym_calc_value(e->left.sym);
-               sym_calc_value(e->right.sym);
-               str1 = sym_get_string_value(e->left.sym);
-               str2 = sym_get_string_value(e->right.sym);
-               return !strcmp(str1, str2) ? no : yes;
+               break;
        default:
                printf("expr_calc_value: %d?\n", e->type);
                return no;
        }
+
+       sym_calc_value(e->left.sym);
+       sym_calc_value(e->right.sym);
+       str1 = sym_get_string_value(e->left.sym);
+       str2 = sym_get_string_value(e->right.sym);
+
+       if (e->left.sym->type != S_STRING || e->right.sym->type != S_STRING) {
+               k1 = expr_parse_string(str1, e->left.sym->type, &lval);
+               k2 = expr_parse_string(str2, e->right.sym->type, &rval);
+       }
+
+       if (k1 == k_string || k2 == k_string)
+               res = strcmp(str1, str2);
+       else if (k1 == k_invalid || k2 == k_invalid) {
+               if (e->type != E_EQUAL && e->type != E_UNEQUAL) {
+                       printf("Cannot compare \"%s\" and \"%s\"\n", str1, str2);
+                       return no;
+               }
+               res = strcmp(str1, str2);
+       } else if (k1 == k_unsigned || k2 == k_unsigned)
+               res = (lval.u > rval.u) - (lval.u < rval.u);
+       else /* if (k1 == k_signed && k2 == k_signed) */
+               res = (lval.s > rval.s) - (lval.s < rval.s);
+
+       switch(e->type) {
+       case E_EQUAL:
+               return res ? no : yes;
+       case E_GEQ:
+               return res >= 0 ? yes : no;
+       case E_GTH:
+               return res > 0 ? yes : no;
+       case E_LEQ:
+               return res <= 0 ? yes : no;
+       case E_LTH:
+               return res < 0 ? yes : no;
+       case E_UNEQUAL:
+               return res ? yes : no;
+       default:
+               printf("expr_calc_value: relation %d?\n", e->type);
+               return no;
+       }
 }
 
 static int expr_compare_type(enum expr_type t1, enum expr_type t2)
@@ -987,6 +999,12 @@ static int expr_compare_type(enum expr_type t1, enum expr_type t2)
        if (t1 == t2)
                return 0;
        switch (t1) {
+       case E_LEQ:
+       case E_LTH:
+       case E_GEQ:
+       case E_GTH:
+               if (t2 == E_EQUAL || t2 == E_UNEQUAL)
+                       return 1;
        case E_EQUAL:
        case E_UNEQUAL:
                if (t2 == E_NOT)
@@ -1080,6 +1098,24 @@ void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char *
                fn(data, NULL, "=");
                fn(data, e->right.sym, e->right.sym->name);
                break;
+       case E_LEQ:
+       case E_LTH:
+               if (e->left.sym->name)
+                       fn(data, e->left.sym, e->left.sym->name);
+               else
+                       fn(data, NULL, "<choice>");
+               fn(data, NULL, e->type == E_LEQ ? "<=" : "<");
+               fn(data, e->right.sym, e->right.sym->name);
+               break;
+       case E_GEQ:
+       case E_GTH:
+               if (e->left.sym->name)
+                       fn(data, e->left.sym, e->left.sym->name);
+               else
+                       fn(data, NULL, "<choice>");
+               fn(data, NULL, e->type == E_LEQ ? ">=" : ">");
+               fn(data, e->right.sym, e->right.sym->name);
+               break;
        case E_UNEQUAL:
                if (e->left.sym->name)
                        fn(data, e->left.sym, e->left.sym->name);
index a2fc96a2bd2cf84115b8009b097ea772ab53dcdf..973b6f73336829a6290a0d2bb15a841086d9a18c 100644 (file)
@@ -29,7 +29,9 @@ typedef enum tristate {
 } tristate;
 
 enum expr_type {
-       E_NONE, E_OR, E_AND, E_NOT, E_EQUAL, E_UNEQUAL, E_LIST, E_SYMBOL, E_RANGE
+       E_NONE, E_OR, E_AND, E_NOT,
+       E_EQUAL, E_UNEQUAL, E_LTH, E_LEQ, E_GTH, E_GEQ,
+       E_LIST, E_SYMBOL, E_RANGE
 };
 
 union expr_data {
index 6731377f9bb2546f3b303d4d89df466e8b73b045..70c5ee189dce7c7d573c044117d3f63e4450cbcb 100644 (file)
@@ -1166,6 +1166,10 @@ static struct symbol *sym_check_expr_deps(struct expr *e)
        case E_NOT:
                return sym_check_expr_deps(e->left.expr);
        case E_EQUAL:
+       case E_GEQ:
+       case E_GTH:
+       case E_LEQ:
+       case E_LTH:
        case E_UNEQUAL:
                sym = sym_check_deps(e->left.sym);
                if (sym)
index 6c62d93b4ffbd018807993788fb78848d176d67d..200a3fe3009153bba22742e862f6114f9e7f5fc8 100644 (file)
@@ -122,6 +122,10 @@ n  [A-Za-z0-9_]
        "!"     return T_NOT;
        "="     return T_EQUAL;
        "!="    return T_UNEQUAL;
+       "<="    return T_LESS_EQUAL;
+       ">="    return T_GREATER_EQUAL;
+       "<"     return T_LESS;
+       ">"     return T_GREATER;
        \"|\'   {
                str = yytext[0];
                new_string();
@@ -141,7 +145,12 @@ n  [A-Za-z0-9_]
        }
        #.*     /* comment */
        \\\n    current_file->lineno++;
-       .
+       [[:blank:]]+
+       .       {
+               fprintf(stderr,
+                       "%s:%d:warning: ignoring unsupported character '%c'\n",
+                       zconf_curname(), zconf_lineno(), *yytext);
+       }
        <<EOF>> {
                BEGIN(INITIAL);
        }
index 349a7f24315b1d1c4887962626365fd749fdae02..dd4e86c825210775cd9282eae71f489f80adec07 100644 (file)
@@ -365,323 +365,354 @@ int zconflineno = 1;
 
 extern char *zconftext;
 #define yytext_ptr zconftext
-static yyconst flex_int16_t yy_nxt[][17] =
+static yyconst flex_int16_t yy_nxt[][19] =
     {
     {
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0
+        0,    0,    0,    0,    0,    0,    0,    0,    0
     },
 
     {
        11,   12,   13,   14,   12,   12,   15,   12,   12,   12,
-       12,   12,   12,   12,   12,   12,   12
+       12,   12,   12,   12,   12,   12,   12,   12,   12
     },
 
     {
        11,   12,   13,   14,   12,   12,   15,   12,   12,   12,
-       12,   12,   12,   12,   12,   12,   12
+       12,   12,   12,   12,   12,   12,   12,   12,   12
     },
 
     {
        11,   16,   16,   17,   16,   16,   16,   16,   16,   16,
-       16,   16,   16,   18,   16,   16,   16
+       16,   16,   16,   18,   16,   16,   16,   16,   16
     },
 
     {
        11,   16,   16,   17,   16,   16,   16,   16,   16,   16,
-       16,   16,   16,   18,   16,   16,   16
+       16,   16,   16,   18,   16,   16,   16,   16,   16
 
     },
 
     {
        11,   19,   20,   21,   19,   19,   19,   19,   19,   19,
-       19,   19,   19,   19,   19,   19,   19
+       19,   19,   19,   19,   19,   19,   19,   19,   19
     },
 
     {
        11,   19,   20,   21,   19,   19,   19,   19,   19,   19,
-       19,   19,   19,   19,   19,   19,   19
+       19,   19,   19,   19,   19,   19,   19,   19,   19
     },
 
     {
        11,   22,   22,   23,   22,   24,   22,   22,   24,   22,
-       22,   22,   22,   22,   22,   25,   22
+       22,   22,   22,   22,   22,   22,   22,   25,   22
     },
 
     {
        11,   22,   22,   23,   22,   24,   22,   22,   24,   22,
-       22,   22,   22,   22,   22,   25,   22
+       22,   22,   22,   22,   22,   22,   22,   25,   22
     },
 
     {
-       11,   26,   26,   27,   28,   29,   30,   31,   29,   32,
-       33,   34,   35,   35,   36,   37,   38
+       11,   26,   27,   28,   29,   30,   31,   32,   30,   33,
+       34,   35,   36,   36,   37,   38,   39,   40,   41
 
     },
 
     {
-       11,   26,   26,   27,   28,   29,   30,   31,   29,   32,
-       33,   34,   35,   35,   36,   37,   38
+       11,   26,   27,   28,   29,   30,   31,   32,   30,   33,
+       34,   35,   36,   36,   37,   38,   39,   40,   41
     },
 
     {
       -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,
-      -11,  -11,  -11,  -11,  -11,  -11,  -11
+      -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11
     },
 
     {
        11,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
-      -12,  -12,  -12,  -12,  -12,  -12,  -12
+      -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12
     },
 
     {
-       11,  -13,   39,   40,  -13,  -13,   41,  -13,  -13,  -13,
-      -13,  -13,  -13,  -13,  -13,  -13,  -13
+       11,  -13,   42,   43,  -13,  -13,   44,  -13,  -13,  -13,
+      -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13
     },
 
     {
        11,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,
-      -14,  -14,  -14,  -14,  -14,  -14,  -14
+      -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14
 
     },
 
     {
-       11,   42,   42,   43,   42,   42,   42,   42,   42,   42,
-       42,   42,   42,   42,   42,   42,   42
+       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
+       45,   45,   45,   45,   45,   45,   45,   45,   45
     },
 
     {
        11,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,
-      -16,  -16,  -16,  -16,  -16,  -16,  -16
+      -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16
     },
 
     {
        11,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,
-      -17,  -17,  -17,  -17,  -17,  -17,  -17
+      -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17
     },
 
     {
        11,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,  -18,
-      -18,  -18,  -18,   44,  -18,  -18,  -18
+      -18,  -18,  -18,   47,  -18,  -18,  -18,  -18,  -18
     },
 
     {
-       11,   45,   45,  -19,   45,   45,   45,   45,   45,   45,
-       45,   45,   45,   45,   45,   45,   45
+       11,   48,   48,  -19,   48,   48,   48,   48,   48,   48,
+       48,   48,   48,   48,   48,   48,   48,   48,   48
 
     },
 
     {
-       11,  -20,   46,   47,  -20,  -20,  -20,  -20,  -20,  -20,
-      -20,  -20,  -20,  -20,  -20,  -20,  -20
+       11,  -20,   49,   50,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20
     },
 
     {
-       11,   48,  -21,  -21,   48,   48,   48,   48,   48,   48,
-       48,   48,   48,   48,   48,   48,   48
+       11,   51,  -21,  -21,   51,   51,   51,   51,   51,   51,
+       51,   51,   51,   51,   51,   51,   51,   51,   51
     },
 
     {
-       11,   49,   49,   50,   49,  -22,   49,   49,  -22,   49,
-       49,   49,   49,   49,   49,  -22,   49
+       11,   52,   52,   53,   52,  -22,   52,   52,  -22,   52,
+       52,   52,   52,   52,   52,   52,   52,  -22,   52
     },
 
     {
        11,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
-      -23,  -23,  -23,  -23,  -23,  -23,  -23
+      -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23
     },
 
     {
        11,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,
-      -24,  -24,  -24,  -24,  -24,  -24,  -24
+      -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24
 
     },
 
     {
-       11,   51,   51,   52,   51,   51,   51,   51,   51,   51,
-       51,   51,   51,   51,   51,   51,   51
+       11,   54,   54,   55,   54,   54,   54,   54,   54,   54,
+       54,   54,   54,   54,   54,   54,   54,   54,   54
     },
 
     {
        11,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,
-      -26,  -26,  -26,  -26,  -26,  -26,  -26
+      -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26
     },
 
     {
-       11,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
-      -27,  -27,  -27,  -27,  -27,  -27,  -27
+       11,  -27,   56,  -27,  -27,  -27,  -27,  -27,  -27,  -27,
+      -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27,  -27
     },
 
     {
        11,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
-      -28,  -28,  -28,  -28,   53,  -28,  -28
+      -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28
     },
 
     {
        11,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,  -29,
-      -29,  -29,  -29,  -29,  -29,  -29,  -29
+      -29,  -29,  -29,  -29,  -29,   57,  -29,  -29,  -29
 
     },
 
     {
-       11,   54,   54,  -30,   54,   54,   54,   54,   54,   54,
-       54,   54,   54,   54,   54,   54,   54
+       11,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,
+      -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30
     },
 
     {
-       11,  -31,  -31,  -31,  -31,  -31,  -31,   55,  -31,  -31,
-      -31,  -31,  -31,  -31,  -31,  -31,  -31
+       11,   58,   58,  -31,   58,   58,   58,   58,   58,   58,
+       58,   58,   58,   58,   58,   58,   58,   58,   58
     },
 
     {
-       11,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,
-      -32,  -32,  -32,  -32,  -32,  -32,  -32
+       11,  -32,  -32,  -32,  -32,  -32,  -32,   59,  -32,  -32,
+      -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32,  -32
     },
 
     {
        11,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,
-      -33,  -33,  -33,  -33,  -33,  -33,  -33
+      -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33
     },
 
     {
        11,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
-      -34,   56,   57,   57,  -34,  -34,  -34
+      -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34
 
     },
 
     {
        11,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,
-      -35,   57,   57,   57,  -35,  -35,  -35
+      -35,   60,   61,   61,  -35,  -35,  -35,  -35,  -35
     },
 
     {
        11,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,
-      -36,  -36,  -36,  -36,  -36,  -36,  -36
+      -36,   61,   61,   61,  -36,  -36,  -36,  -36,  -36
     },
 
     {
-       11,  -37,  -37,   58,  -37,  -37,  -37,  -37,  -37,  -37,
-      -37,  -37,  -37,  -37,  -37,  -37,  -37
+       11,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
+      -37,  -37,  -37,  -37,  -37,   62,  -37,  -37,  -37
     },
 
     {
        11,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,
-      -38,  -38,  -38,  -38,  -38,  -38,   59
+      -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38,  -38
     },
 
     {
-       11,  -39,   39,   40,  -39,  -39,   41,  -39,  -39,  -39,
-      -39,  -39,  -39,  -39,  -39,  -39,  -39
+       11,  -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39,  -39,
+      -39,  -39,  -39,  -39,  -39,   63,  -39,  -39,  -39
 
     },
 
     {
-       11,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,
-      -40,  -40,  -40,  -40,  -40,  -40,  -40
+       11,  -40,  -40,   64,  -40,  -40,  -40,  -40,  -40,  -40,
+      -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40,  -40
     },
 
     {
-       11,   42,   42,   43,   42,   42,   42,   42,   42,   42,
-       42,   42,   42,   42,   42,   42,   42
+       11,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,
+      -41,  -41,  -41,  -41,  -41,  -41,  -41,  -41,   65
     },
 
     {
-       11,   42,   42,   43,   42,   42,   42,   42,   42,   42,
-       42,   42,   42,   42,   42,   42,   42
+       11,  -42,   42,   43,  -42,  -42,   44,  -42,  -42,  -42,
+      -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42,  -42
     },
 
     {
        11,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,
-      -43,  -43,  -43,  -43,  -43,  -43,  -43
+      -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43,  -43
     },
 
     {
-       11,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,  -44,
-      -44,  -44,  -44,   44,  -44,  -44,  -44
+       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
+       45,   45,   45,   45,   45,   45,   45,   45,   45
 
     },
 
     {
-       11,   45,   45,  -45,   45,   45,   45,   45,   45,   45,
-       45,   45,   45,   45,   45,   45,   45
+       11,   45,   45,   46,   45,   45,   45,   45,   45,   45,
+       45,   45,   45,   45,   45,   45,   45,   45,   45
     },
 
     {
-       11,  -46,   46,   47,  -46,  -46,  -46,  -46,  -46,  -46,
-      -46,  -46,  -46,  -46,  -46,  -46,  -46
+       11,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,
+      -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46,  -46
     },
 
     {
-       11,   48,  -47,  -47,   48,   48,   48,   48,   48,   48,
-       48,   48,   48,   48,   48,   48,   48
+       11,  -47,  -47,  -47,  -47,  -47,  -47,  -47,  -47,  -47,
+      -47,  -47,  -47,   47,  -47,  -47,  -47,  -47,  -47
     },
 
     {
-       11,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,  -48,
-      -48,  -48,  -48,  -48,  -48,  -48,  -48
+       11,   48,   48,  -48,   48,   48,   48,   48,   48,   48,
+       48,   48,   48,   48,   48,   48,   48,   48,   48
     },
 
     {
-       11,   49,   49,   50,   49,  -49,   49,   49,  -49,   49,
-       49,   49,   49,   49,   49,  -49,   49
+       11,  -49,   49,   50,  -49,  -49,  -49,  -49,  -49,  -49,
+      -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49,  -49
 
     },
 
     {
-       11,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,  -50,
-      -50,  -50,  -50,  -50,  -50,  -50,  -50
+       11,   51,  -50,  -50,   51,   51,   51,   51,   51,   51,
+       51,   51,   51,   51,   51,   51,   51,   51,   51
     },
 
     {
-       11,  -51,  -51,   52,  -51,  -51,  -51,  -51,  -51,  -51,
-      -51,  -51,  -51,  -51,  -51,  -51,  -51
+       11,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,
+      -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51,  -51
     },
 
     {
-       11,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,  -52,
-      -52,  -52,  -52,  -52,  -52,  -52,  -52
+       11,   52,   52,   53,   52,  -52,   52,   52,  -52,   52,
+       52,   52,   52,   52,   52,   52,   52,  -52,   52
     },
 
     {
        11,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,
-      -53,  -53,  -53,  -53,  -53,  -53,  -53
+      -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53,  -53
     },
 
     {
-       11,   54,   54,  -54,   54,   54,   54,   54,   54,   54,
-       54,   54,   54,   54,   54,   54,   54
+       11,  -54,  -54,   55,  -54,  -54,  -54,  -54,  -54,  -54,
+      -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54,  -54
 
     },
 
     {
        11,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,
-      -55,  -55,  -55,  -55,  -55,  -55,  -55
+      -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55,  -55
     },
 
     {
-       11,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
-      -56,   60,   57,   57,  -56,  -56,  -56
+       11,  -56,   56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,
+      -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56,  -56
     },
 
     {
        11,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,
-      -57,   57,   57,   57,  -57,  -57,  -57
+      -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57,  -57
     },
 
     {
-       11,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,  -58,
-      -58,  -58,  -58,  -58,  -58,  -58,  -58
+       11,   58,   58,  -58,   58,   58,   58,   58,   58,   58,
+       58,   58,   58,   58,   58,   58,   58,   58,   58
     },
 
     {
        11,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,
-      -59,  -59,  -59,  -59,  -59,  -59,  -59
+      -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59,  -59
 
     },
 
     {
        11,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,  -60,
-      -60,   57,   57,   57,  -60,  -60,  -60
+      -60,   66,   61,   61,  -60,  -60,  -60,  -60,  -60
+    },
+
+    {
+       11,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,  -61,
+      -61,   61,   61,   61,  -61,  -61,  -61,  -61,  -61
+    },
+
+    {
+       11,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,
+      -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62,  -62
+    },
+
+    {
+       11,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,
+      -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63,  -63
+    },
+
+    {
+       11,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,
+      -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64,  -64
+
+    },
+
+    {
+       11,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,
+      -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65,  -65
+    },
+
+    {
+       11,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,  -66,
+      -66,   61,   61,   61,  -66,  -66,  -66,  -66,  -66
     },
 
     } ;
@@ -701,8 +732,8 @@ static void yy_fatal_error (yyconst char msg[]  );
        *yy_cp = '\0'; \
        (yy_c_buf_p) = yy_cp;
 
-#define YY_NUM_RULES 33
-#define YY_END_OF_BUFFER 34
+#define YY_NUM_RULES 38
+#define YY_END_OF_BUFFER 39
 /* This struct is not used in this scanner,
    but its presence is necessary. */
 struct yy_trans_info
@@ -710,14 +741,15 @@ struct yy_trans_info
        flex_int32_t yy_verify;
        flex_int32_t yy_nxt;
        };
-static yyconst flex_int16_t yy_accept[61] =
+static yyconst flex_int16_t yy_accept[67] =
     {   0,
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-       34,    5,    4,    2,    3,    7,    8,    6,   32,   29,
-       31,   24,   28,   27,   26,   22,   17,   13,   16,   20,
-       22,   11,   12,   19,   19,   14,   22,   22,    4,    2,
-        3,    3,    1,    6,   32,   29,   31,   30,   24,   23,
-       26,   25,   15,   20,    9,   19,   19,   21,   10,   18
+       39,    5,    4,    2,    3,    7,    8,    6,   37,   34,
+       36,   29,   33,   32,   31,   27,   26,   21,   13,   20,
+       24,   27,   11,   12,   23,   23,   18,   14,   19,   27,
+       27,    4,    2,    3,    3,    1,    6,   37,   34,   36,
+       35,   29,   28,   31,   30,   26,   15,   24,    9,   23,
+       23,   16,   17,   25,   10,   22
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -727,15 +759,15 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    2,    4,    5,    6,    1,    1,    7,    8,    9,
        10,    1,    1,    1,   11,   12,   12,   13,   13,   13,
-       13,   13,   13,   13,   13,   13,   13,    1,    1,    1,
-       14,    1,    1,    1,   13,   13,   13,   13,   13,   13,
+       13,   13,   13,   13,   13,   13,   13,    1,    1,   14,
+       15,   16,    1,    1,   13,   13,   13,   13,   13,   13,
        13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
        13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
-        1,   15,    1,    1,   13,    1,   13,   13,   13,   13,
+        1,   17,    1,    1,   13,    1,   13,   13,   13,   13,
 
        13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
        13,   13,   13,   13,   13,   13,   13,   13,   13,   13,
-       13,   13,    1,   16,    1,    1,    1,    1,    1,    1,
+       13,   13,    1,   18,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -920,7 +952,7 @@ static int input (void );
 /* This used to be an fputs(), but since the string might contain NUL's,
  * we now use fwrite().
  */
-#define ECHO do { if (fwrite( zconftext, zconfleng, 1, zconfout )) {} } while (0)
+#define ECHO fwrite( zconftext, zconfleng, 1, zconfout )
 #endif
 
 /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
@@ -1142,22 +1174,38 @@ return T_UNEQUAL;
        YY_BREAK
 case 16:
 YY_RULE_SETUP
+return T_LESS_EQUAL;
+       YY_BREAK
+case 17:
+YY_RULE_SETUP
+return T_GREATER_EQUAL;
+       YY_BREAK
+case 18:
+YY_RULE_SETUP
+return T_LESS;
+       YY_BREAK
+case 19:
+YY_RULE_SETUP
+return T_GREATER;
+       YY_BREAK
+case 20:
+YY_RULE_SETUP
 {
                str = zconftext[0];
                new_string();
                BEGIN(STRING);
        }
        YY_BREAK
-case 17:
-/* rule 17 can match eol */
+case 21:
+/* rule 21 can match eol */
 YY_RULE_SETUP
 BEGIN(INITIAL); current_file->lineno++; return T_EOL;
        YY_BREAK
-case 18:
+case 22:
 YY_RULE_SETUP
 /* ignore */
        YY_BREAK
-case 19:
+case 23:
 YY_RULE_SETUP
 {
                const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng);
@@ -1170,18 +1218,26 @@ YY_RULE_SETUP
                return T_WORD;
        }
        YY_BREAK
-case 20:
+case 24:
 YY_RULE_SETUP
 /* comment */
        YY_BREAK
-case 21:
-/* rule 21 can match eol */
+case 25:
+/* rule 25 can match eol */
 YY_RULE_SETUP
 current_file->lineno++;
        YY_BREAK
-case 22:
+case 26:
 YY_RULE_SETUP
 
+       YY_BREAK
+case 27:
+YY_RULE_SETUP
+{
+               fprintf(stderr,
+                       "%s:%d:warning: ignoring unsupported character '%c'\n",
+                       zconf_curname(), zconf_lineno(), *zconftext);
+       }
        YY_BREAK
 case YY_STATE_EOF(PARAM):
 {
@@ -1189,8 +1245,8 @@ case YY_STATE_EOF(PARAM):
        }
        YY_BREAK
 
-case 23:
-/* rule 23 can match eol */
+case 28:
+/* rule 28 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up zconftext again */
@@ -1201,14 +1257,14 @@ YY_RULE_SETUP
                return T_WORD_QUOTE;
        }
        YY_BREAK
-case 24:
+case 29:
 YY_RULE_SETUP
 {
                append_string(zconftext, zconfleng);
        }
        YY_BREAK
-case 25:
-/* rule 25 can match eol */
+case 30:
+/* rule 30 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up zconftext again */
@@ -1219,13 +1275,13 @@ YY_RULE_SETUP
                return T_WORD_QUOTE;
        }
        YY_BREAK
-case 26:
+case 31:
 YY_RULE_SETUP
 {
                append_string(zconftext + 1, zconfleng - 1);
        }
        YY_BREAK
-case 27:
+case 32:
 YY_RULE_SETUP
 {
                if (str == zconftext[0]) {
@@ -1236,8 +1292,8 @@ YY_RULE_SETUP
                        append_string(zconftext, 1);
        }
        YY_BREAK
-case 28:
-/* rule 28 can match eol */
+case 33:
+/* rule 33 can match eol */
 YY_RULE_SETUP
 {
                printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno());
@@ -1252,7 +1308,7 @@ case YY_STATE_EOF(STRING):
        }
        YY_BREAK
 
-case 29:
+case 34:
 YY_RULE_SETUP
 {
                ts = 0;
@@ -1277,8 +1333,8 @@ YY_RULE_SETUP
                }
        }
        YY_BREAK
-case 30:
-/* rule 30 can match eol */
+case 35:
+/* rule 35 can match eol */
 *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up zconftext again */
@@ -1289,15 +1345,15 @@ YY_RULE_SETUP
                return T_HELPTEXT;
        }
        YY_BREAK
-case 31:
-/* rule 31 can match eol */
+case 36:
+/* rule 36 can match eol */
 YY_RULE_SETUP
 {
                current_file->lineno++;
                append_string("\n", 1);
        }
        YY_BREAK
-case 32:
+case 37:
 YY_RULE_SETUP
 {
                while (zconfleng) {
@@ -1328,7 +1384,7 @@ case YY_STATE_EOF(COMMAND):
        yyterminate();
 }
        YY_BREAK
-case 33:
+case 38:
 YY_RULE_SETUP
 YY_FATAL_ERROR( "flex scanner jammed" );
        YY_BREAK
index de5e84ed3f96f824b63a1e5bd2e4da597996747f..7a4d658c20667d9f9d0df5facb5d534fa2c2f67e 100644 (file)
@@ -1,8 +1,8 @@
-/* A Bison parser, made by GNU Bison 2.5.  */
+/* A Bison parser, made by GNU Bison 2.5.1.  */
 
 /* Bison implementation for Yacc-like parsers in C
    
-      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+      Copyright (C) 1984, 1989-1990, 2000-2012 Free Software Foundation, Inc.
    
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -44,7 +44,7 @@
 #define YYBISON 1
 
 /* Bison version.  */
-#define YYBISON_VERSION "2.5"
+#define YYBISON_VERSION "2.5.1"
 
 /* Skeleton name.  */
 #define YYSKELETON_NAME "yacc.c"
@@ -108,6 +108,14 @@ static struct menu *current_menu, *current_entry;
 
 
 
+# ifndef YY_NULL
+#  if defined __cplusplus && 201103L <= __cplusplus
+#   define YY_NULL nullptr
+#  else
+#   define YY_NULL 0
+#  endif
+# endif
+
 /* Enabling traces.  */
 #ifndef YYDEBUG
 # define YYDEBUG 1
@@ -159,13 +167,17 @@ static struct menu *current_menu, *current_entry;
      T_WORD = 281,
      T_WORD_QUOTE = 282,
      T_UNEQUAL = 283,
-     T_CLOSE_PAREN = 284,
-     T_OPEN_PAREN = 285,
-     T_EOL = 286,
-     T_OR = 287,
-     T_AND = 288,
-     T_EQUAL = 289,
-     T_NOT = 290
+     T_LESS = 284,
+     T_LESS_EQUAL = 285,
+     T_GREATER = 286,
+     T_GREATER_EQUAL = 287,
+     T_CLOSE_PAREN = 288,
+     T_OPEN_PAREN = 289,
+     T_EOL = 290,
+     T_OR = 291,
+     T_AND = 292,
+     T_EQUAL = 293,
+     T_NOT = 294
    };
 #endif
 
@@ -304,6 +316,7 @@ YYID (yyi)
 #    if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
      || defined __cplusplus || defined _MSC_VER)
 #     include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+      /* Use EXIT_SUCCESS as a witness for stdlib.h.  */
 #     ifndef EXIT_SUCCESS
 #      define EXIT_SUCCESS 0
 #     endif
@@ -395,20 +408,20 @@ union yyalloc
 #endif
 
 #if defined YYCOPY_NEEDED && YYCOPY_NEEDED
-/* Copy COUNT objects from FROM to TO.  The source and destination do
+/* Copy COUNT objects from SRC to DST.  The source and destination do
    not overlap.  */
 # ifndef YYCOPY
 #  if defined __GNUC__ && 1 < __GNUC__
-#   define YYCOPY(To, From, Count) \
-      __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+#   define YYCOPY(Dst, Src, Count) \
+      __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src)))
 #  else
-#   define YYCOPY(To, From, Count)             \
-      do                                       \
-       {                                       \
-         YYSIZE_T yyi;                         \
-         for (yyi = 0; yyi < (Count); yyi++)   \
-           (To)[yyi] = (From)[yyi];            \
-       }                                       \
+#   define YYCOPY(Dst, Src, Count)              \
+      do                                        \
+        {                                       \
+          YYSIZE_T yyi;                         \
+          for (yyi = 0; yyi < (Count); yyi++)   \
+            (Dst)[yyi] = (Src)[yyi];            \
+        }                                       \
       while (YYID (0))
 #  endif
 # endif
@@ -417,20 +430,20 @@ union yyalloc
 /* YYFINAL -- State number of the termination state.  */
 #define YYFINAL  11
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   290
+#define YYLAST   298
 
 /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  36
+#define YYNTOKENS  40
 /* YYNNTS -- Number of nonterminals.  */
 #define YYNNTS  50
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  118
+#define YYNRULES  122
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  191
+#define YYNSTATES  199
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
-#define YYMAXUTOK   290
+#define YYMAXUTOK   294
 
 #define YYTRANSLATE(YYX)                                               \
   ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
@@ -467,7 +480,7 @@ static const yytype_uint8 yytranslate[] =
        5,     6,     7,     8,     9,    10,    11,    12,    13,    14,
       15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
       25,    26,    27,    28,    29,    30,    31,    32,    33,    34,
-      35
+      35,    36,    37,    38,    39
 };
 
 #if YYDEBUG
@@ -486,64 +499,67 @@ static const yytype_uint16 yyprhs[] =
      235,   238,   241,   244,   248,   252,   255,   258,   261,   262,
      265,   268,   271,   276,   277,   280,   283,   286,   287,   290,
      292,   294,   297,   300,   303,   305,   308,   309,   312,   314,
-     318,   322,   326,   329,   333,   337,   339,   341,   342
+     318,   322,   326,   330,   334,   338,   342,   345,   349,   353,
+     355,   357,   358
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
 static const yytype_int8 yyrhs[] =
 {
-      37,     0,    -1,    81,    38,    -1,    38,    -1,    63,    39,
-      -1,    39,    -1,    -1,    39,    41,    -1,    39,    55,    -1,
-      39,    67,    -1,    39,    80,    -1,    39,    26,     1,    31,
-      -1,    39,    40,     1,    31,    -1,    39,     1,    31,    -1,
+      41,     0,    -1,    85,    42,    -1,    42,    -1,    67,    43,
+      -1,    43,    -1,    -1,    43,    45,    -1,    43,    59,    -1,
+      43,    71,    -1,    43,    84,    -1,    43,    26,     1,    35,
+      -1,    43,    44,     1,    35,    -1,    43,     1,    35,    -1,
       16,    -1,    18,    -1,    19,    -1,    21,    -1,    17,    -1,
-      22,    -1,    20,    -1,    23,    -1,    31,    -1,    61,    -1,
-      71,    -1,    44,    -1,    46,    -1,    69,    -1,    26,     1,
-      31,    -1,     1,    31,    -1,    10,    26,    31,    -1,    43,
-      47,    -1,    11,    26,    31,    -1,    45,    47,    -1,    -1,
-      47,    48,    -1,    47,    49,    -1,    47,    75,    -1,    47,
-      73,    -1,    47,    42,    -1,    47,    31,    -1,    19,    78,
-      31,    -1,    18,    79,    82,    31,    -1,    20,    83,    82,
-      31,    -1,    21,    26,    82,    31,    -1,    22,    84,    84,
-      82,    31,    -1,    24,    50,    31,    -1,    -1,    50,    26,
-      51,    -1,    -1,    34,    79,    -1,     7,    85,    31,    -1,
-      52,    56,    -1,    80,    -1,    53,    58,    54,    -1,    -1,
-      56,    57,    -1,    56,    75,    -1,    56,    73,    -1,    56,
-      31,    -1,    56,    42,    -1,    18,    79,    82,    31,    -1,
-      19,    78,    31,    -1,    17,    31,    -1,    20,    26,    82,
-      31,    -1,    -1,    58,    41,    -1,    14,    83,    81,    -1,
-      80,    -1,    59,    62,    60,    -1,    -1,    62,    41,    -1,
-      62,    67,    -1,    62,    55,    -1,     3,    79,    81,    -1,
-       4,    79,    31,    -1,    64,    76,    74,    -1,    80,    -1,
-      65,    68,    66,    -1,    -1,    68,    41,    -1,    68,    67,
-      -1,    68,    55,    -1,     6,    79,    31,    -1,     9,    79,
-      31,    -1,    70,    74,    -1,    12,    31,    -1,    72,    13,
-      -1,    -1,    74,    75,    -1,    74,    31,    -1,    74,    42,
-      -1,    16,    25,    83,    31,    -1,    -1,    76,    77,    -1,
-      76,    31,    -1,    23,    82,    -1,    -1,    79,    82,    -1,
-      26,    -1,    27,    -1,     5,    31,    -1,     8,    31,    -1,
-      15,    31,    -1,    31,    -1,    81,    31,    -1,    -1,    14,
-      83,    -1,    84,    -1,    84,    34,    84,    -1,    84,    28,
-      84,    -1,    30,    83,    29,    -1,    35,    83,    -1,    83,
-      32,    83,    -1,    83,    33,    83,    -1,    26,    -1,    27,
-      -1,    -1,    26,    -1
+      22,    -1,    20,    -1,    23,    -1,    35,    -1,    65,    -1,
+      75,    -1,    48,    -1,    50,    -1,    73,    -1,    26,     1,
+      35,    -1,     1,    35,    -1,    10,    26,    35,    -1,    47,
+      51,    -1,    11,    26,    35,    -1,    49,    51,    -1,    -1,
+      51,    52,    -1,    51,    53,    -1,    51,    79,    -1,    51,
+      77,    -1,    51,    46,    -1,    51,    35,    -1,    19,    82,
+      35,    -1,    18,    83,    86,    35,    -1,    20,    87,    86,
+      35,    -1,    21,    26,    86,    35,    -1,    22,    88,    88,
+      86,    35,    -1,    24,    54,    35,    -1,    -1,    54,    26,
+      55,    -1,    -1,    38,    83,    -1,     7,    89,    35,    -1,
+      56,    60,    -1,    84,    -1,    57,    62,    58,    -1,    -1,
+      60,    61,    -1,    60,    79,    -1,    60,    77,    -1,    60,
+      35,    -1,    60,    46,    -1,    18,    83,    86,    35,    -1,
+      19,    82,    35,    -1,    17,    35,    -1,    20,    26,    86,
+      35,    -1,    -1,    62,    45,    -1,    14,    87,    85,    -1,
+      84,    -1,    63,    66,    64,    -1,    -1,    66,    45,    -1,
+      66,    71,    -1,    66,    59,    -1,     3,    83,    85,    -1,
+       4,    83,    35,    -1,    68,    80,    78,    -1,    84,    -1,
+      69,    72,    70,    -1,    -1,    72,    45,    -1,    72,    71,
+      -1,    72,    59,    -1,     6,    83,    35,    -1,     9,    83,
+      35,    -1,    74,    78,    -1,    12,    35,    -1,    76,    13,
+      -1,    -1,    78,    79,    -1,    78,    35,    -1,    78,    46,
+      -1,    16,    25,    87,    35,    -1,    -1,    80,    81,    -1,
+      80,    35,    -1,    23,    86,    -1,    -1,    83,    86,    -1,
+      26,    -1,    27,    -1,     5,    35,    -1,     8,    35,    -1,
+      15,    35,    -1,    35,    -1,    85,    35,    -1,    -1,    14,
+      87,    -1,    88,    -1,    88,    29,    88,    -1,    88,    30,
+      88,    -1,    88,    31,    88,    -1,    88,    32,    88,    -1,
+      88,    38,    88,    -1,    88,    28,    88,    -1,    34,    87,
+      33,    -1,    39,    87,    -1,    87,    36,    87,    -1,    87,
+      37,    87,    -1,    26,    -1,    27,    -1,    -1,    26,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint16 yyrline[] =
 {
-       0,   103,   103,   103,   105,   105,   107,   109,   110,   111,
-     112,   113,   114,   118,   122,   122,   122,   122,   122,   122,
-     122,   122,   126,   127,   128,   129,   130,   131,   135,   136,
-     142,   150,   156,   164,   174,   176,   177,   178,   179,   180,
-     181,   184,   192,   198,   208,   214,   220,   223,   225,   236,
-     237,   242,   251,   256,   264,   267,   269,   270,   271,   272,
-     273,   276,   282,   293,   299,   309,   311,   316,   324,   332,
-     335,   337,   338,   339,   344,   351,   358,   363,   371,   374,
-     376,   377,   378,   381,   389,   396,   403,   409,   416,   418,
-     419,   420,   423,   431,   433,   434,   437,   444,   446,   451,
-     452,   455,   456,   457,   461,   462,   465,   466,   469,   470,
-     471,   472,   473,   474,   475,   478,   479,   482,   483
+       0,   108,   108,   108,   110,   110,   112,   114,   115,   116,
+     117,   118,   119,   123,   127,   127,   127,   127,   127,   127,
+     127,   127,   131,   132,   133,   134,   135,   136,   140,   141,
+     147,   155,   161,   169,   179,   181,   182,   183,   184,   185,
+     186,   189,   197,   203,   213,   219,   225,   228,   230,   241,
+     242,   247,   256,   261,   269,   272,   274,   275,   276,   277,
+     278,   281,   287,   298,   304,   314,   316,   321,   329,   337,
+     340,   342,   343,   344,   349,   356,   363,   368,   376,   379,
+     381,   382,   383,   386,   394,   401,   408,   414,   421,   423,
+     424,   425,   428,   436,   438,   439,   442,   449,   451,   456,
+     457,   460,   461,   462,   466,   467,   470,   471,   474,   475,
+     476,   477,   478,   479,   480,   481,   482,   483,   484,   487,
+     488,   491,   492
 };
 #endif
 
@@ -557,6 +573,7 @@ static const char *const yytname[] =
   "T_MENUCONFIG", "T_HELP", "T_HELPTEXT", "T_IF", "T_ENDIF", "T_DEPENDS",
   "T_OPTIONAL", "T_PROMPT", "T_TYPE", "T_DEFAULT", "T_SELECT", "T_RANGE",
   "T_VISIBLE", "T_OPTION", "T_ON", "T_WORD", "T_WORD_QUOTE", "T_UNEQUAL",
+  "T_LESS", "T_LESS_EQUAL", "T_GREATER", "T_GREATER_EQUAL",
   "T_CLOSE_PAREN", "T_OPEN_PAREN", "T_EOL", "T_OR", "T_AND", "T_EQUAL",
   "T_NOT", "$accept", "input", "start", "stmt_list", "option_name",
   "common_stmt", "option_error", "config_entry_start", "config_stmt",
@@ -568,7 +585,7 @@ static const char *const yytname[] =
   "menu_entry", "menu_end", "menu_stmt", "menu_block", "source_stmt",
   "comment", "comment_stmt", "help_start", "help", "depends_list",
   "depends", "visibility_list", "visible", "prompt_stmt_opt", "prompt",
-  "end", "nl", "if_expr", "expr", "symbol", "word_opt", 0
+  "end", "nl", "if_expr", "expr", "symbol", "word_opt", YY_NULL
 };
 #endif
 
@@ -580,25 +597,26 @@ static const yytype_uint16 yytoknum[] =
        0,   256,   257,   258,   259,   260,   261,   262,   263,   264,
      265,   266,   267,   268,   269,   270,   271,   272,   273,   274,
      275,   276,   277,   278,   279,   280,   281,   282,   283,   284,
-     285,   286,   287,   288,   289,   290
+     285,   286,   287,   288,   289,   290,   291,   292,   293,   294
 };
 # endif
 
 /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
 static const yytype_uint8 yyr1[] =
 {
-       0,    36,    37,    37,    38,    38,    39,    39,    39,    39,
-      39,    39,    39,    39,    40,    40,    40,    40,    40,    40,
-      40,    40,    41,    41,    41,    41,    41,    41,    42,    42,
-      43,    44,    45,    46,    47,    47,    47,    47,    47,    47,
-      47,    48,    48,    48,    48,    48,    49,    50,    50,    51,
-      51,    52,    53,    54,    55,    56,    56,    56,    56,    56,
-      56,    57,    57,    57,    57,    58,    58,    59,    60,    61,
-      62,    62,    62,    62,    63,    64,    65,    66,    67,    68,
-      68,    68,    68,    69,    70,    71,    72,    73,    74,    74,
-      74,    74,    75,    76,    76,    76,    77,    78,    78,    79,
-      79,    80,    80,    80,    81,    81,    82,    82,    83,    83,
-      83,    83,    83,    83,    83,    84,    84,    85,    85
+       0,    40,    41,    41,    42,    42,    43,    43,    43,    43,
+      43,    43,    43,    43,    44,    44,    44,    44,    44,    44,
+      44,    44,    45,    45,    45,    45,    45,    45,    46,    46,
+      47,    48,    49,    50,    51,    51,    51,    51,    51,    51,
+      51,    52,    52,    52,    52,    52,    53,    54,    54,    55,
+      55,    56,    57,    58,    59,    60,    60,    60,    60,    60,
+      60,    61,    61,    61,    61,    62,    62,    63,    64,    65,
+      66,    66,    66,    66,    67,    68,    69,    70,    71,    72,
+      72,    72,    72,    73,    74,    75,    76,    77,    78,    78,
+      78,    78,    79,    80,    80,    80,    81,    82,    82,    83,
+      83,    84,    84,    84,    85,    85,    86,    86,    87,    87,
+      87,    87,    87,    87,    87,    87,    87,    87,    87,    88,
+      88,    89,    89
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
@@ -615,7 +633,8 @@ static const yytype_uint8 yyr2[] =
        2,     2,     2,     3,     3,     2,     2,     2,     0,     2,
        2,     2,     4,     0,     2,     2,     2,     0,     2,     1,
        1,     2,     2,     2,     1,     2,     0,     2,     1,     3,
-       3,     3,     2,     3,     3,     1,     1,     0,     1
+       3,     3,     3,     3,     3,     3,     2,     3,     3,     1,
+       1,     0,     1
 };
 
 /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
@@ -624,72 +643,72 @@ static const yytype_uint8 yyr2[] =
 static const yytype_uint8 yydefact[] =
 {
        6,     0,   104,     0,     3,     0,     6,     6,    99,   100,
-       0,     1,     0,     0,     0,     0,   117,     0,     0,     0,
+       0,     1,     0,     0,     0,     0,   121,     0,     0,     0,
        0,     0,     0,    14,    18,    15,    16,    20,    17,    19,
       21,     0,    22,     0,     7,    34,    25,    34,    26,    55,
       65,     8,    70,    23,    93,    79,     9,    27,    88,    24,
-      10,     0,   105,     2,    74,    13,     0,   101,     0,   118,
-       0,   102,     0,     0,     0,   115,   116,     0,     0,     0,
+      10,     0,   105,     2,    74,    13,     0,   101,     0,   122,
+       0,   102,     0,     0,     0,   119,   120,     0,     0,     0,
      108,   103,     0,     0,     0,     0,     0,     0,     0,    88,
-       0,     0,    75,    83,    51,    84,    30,    32,     0,   112,
-       0,     0,    67,     0,     0,    11,    12,     0,     0,     0,
-       0,    97,     0,     0,     0,    47,     0,    40,    39,    35,
-      36,     0,    38,    37,     0,     0,    97,     0,    59,    60,
-      56,    58,    57,    66,    54,    53,    71,    73,    69,    72,
-      68,   106,    95,     0,    94,    80,    82,    78,    81,    77,
-      90,    91,    89,   111,   113,   114,   110,   109,    29,    86,
-       0,   106,     0,   106,   106,   106,     0,     0,     0,    87,
-      63,   106,     0,   106,     0,    96,     0,     0,    41,    98,
-       0,     0,   106,    49,    46,    28,     0,    62,     0,   107,
-      92,    42,    43,    44,     0,     0,    48,    61,    64,    45,
-      50
+       0,     0,    75,    83,    51,    84,    30,    32,     0,   116,
+       0,     0,    67,     0,     0,     0,     0,     0,     0,    11,
+      12,     0,     0,     0,     0,    97,     0,     0,     0,    47,
+       0,    40,    39,    35,    36,     0,    38,    37,     0,     0,
+      97,     0,    59,    60,    56,    58,    57,    66,    54,    53,
+      71,    73,    69,    72,    68,   106,    95,     0,    94,    80,
+      82,    78,    81,    77,    90,    91,    89,   115,   117,   118,
+     114,   109,   110,   111,   112,   113,    29,    86,     0,   106,
+       0,   106,   106,   106,     0,     0,     0,    87,    63,   106,
+       0,   106,     0,    96,     0,     0,    41,    98,     0,     0,
+     106,    49,    46,    28,     0,    62,     0,   107,    92,    42,
+      43,    44,     0,     0,    48,    61,    64,    45,    50
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int16 yydefgoto[] =
 {
-      -1,     3,     4,     5,    33,    34,   108,    35,    36,    37,
-      38,    74,   109,   110,   157,   186,    39,    40,   124,    41,
-      76,   120,    77,    42,   128,    43,    78,     6,    44,    45,
-     137,    46,    80,    47,    48,    49,   111,   112,    81,   113,
-      79,   134,   152,   153,    50,     7,   165,    69,    70,    60
+      -1,     3,     4,     5,    33,    34,   112,    35,    36,    37,
+      38,    74,   113,   114,   165,   194,    39,    40,   128,    41,
+      76,   124,    77,    42,   132,    43,    78,     6,    44,    45,
+     141,    46,    80,    47,    48,    49,   115,   116,    81,   117,
+      79,   138,   160,   161,    50,     7,   173,    69,    70,    60
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -90
+#define YYPACT_NINF -91
 static const yytype_int16 yypact[] =
 {
-       4,    42,   -90,    96,   -90,   111,   -90,    15,   -90,   -90,
-      75,   -90,    82,    42,   104,    42,   110,   107,    42,   115,
-     125,    -4,   121,   -90,   -90,   -90,   -90,   -90,   -90,   -90,
-     -90,   162,   -90,   163,   -90,   -90,   -90,   -90,   -90,   -90,
-     -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,
-     -90,   139,   -90,   -90,   138,   -90,   142,   -90,   143,   -90,
-     152,   -90,   164,   167,   168,   -90,   -90,    -4,    -4,    77,
-     -18,   -90,   177,   185,    33,    71,   195,   247,   236,    -2,
-     236,   171,   -90,   -90,   -90,   -90,   -90,   -90,    41,   -90,
-      -4,    -4,   138,    97,    97,   -90,   -90,   186,   187,   194,
-      42,    42,    -4,   196,    97,   -90,   219,   -90,   -90,   -90,
-     -90,   210,   -90,   -90,   204,    42,    42,   199,   -90,   -90,
-     -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,
-     -90,   222,   -90,   223,   -90,   -90,   -90,   -90,   -90,   -90,
-     -90,   -90,   -90,   -90,   215,   -90,   -90,   -90,   -90,   -90,
-      -4,   222,   228,   222,    -5,   222,    97,    35,   229,   -90,
-     -90,   222,   232,   222,    -4,   -90,   135,   233,   -90,   -90,
-     234,   235,   222,   240,   -90,   -90,   237,   -90,   239,   -13,
-     -90,   -90,   -90,   -90,   244,    42,   -90,   -90,   -90,   -90,
-     -90
+      19,    37,   -91,    13,   -91,    79,   -91,    20,   -91,   -91,
+     -16,   -91,    21,    37,    25,    37,    41,    36,    37,    78,
+      83,    31,    56,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   116,   -91,   127,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   147,   -91,   -91,   105,   -91,   109,   -91,   111,   -91,
+     114,   -91,   136,   137,   142,   -91,   -91,    31,    31,    76,
+     254,   -91,   143,   146,    27,   115,   207,   258,   243,   -14,
+     243,   179,   -91,   -91,   -91,   -91,   -91,   -91,    -7,   -91,
+      31,    31,   105,    51,    51,    51,    51,    51,    51,   -91,
+     -91,   156,   168,   181,    37,    37,    31,   178,    51,   -91,
+     206,   -91,   -91,   -91,   -91,   196,   -91,   -91,   175,    37,
+      37,   185,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,   -91,   -91,   -91,   -91,   214,   -91,   230,   -91,   -91,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   183,   -91,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,    31,   214,
+     194,   214,    45,   214,    51,    26,   195,   -91,   -91,   214,
+     197,   214,    31,   -91,   139,   208,   -91,   -91,   220,   224,
+     214,   222,   -91,   -91,   226,   -91,   227,   123,   -91,   -91,
+     -91,   -91,   235,    37,   -91,   -91,   -91,   -91,   -91
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int16 yypgoto[] =
 {
-     -90,   -90,   269,   271,   -90,    23,   -70,   -90,   -90,   -90,
-     -90,   243,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -48,
-     -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,   -90,
-     -90,   -20,   -90,   -90,   -90,   -90,   -90,   206,   205,   -68,
-     -90,   -90,   169,    -1,    27,    -7,   118,   -66,   -89,   -90
+     -91,   -91,   264,   268,   -91,    30,   -65,   -91,   -91,   -91,
+     -91,   238,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -12,
+     -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,   -91,
+     -91,    -5,   -91,   -91,   -91,   -91,   -91,   200,   209,   -61,
+     -91,   -91,   170,    -1,    65,     0,   118,   -66,   -90,   -91
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
@@ -698,102 +717,102 @@ static const yytype_int16 yypgoto[] =
 #define YYTABLE_NINF -86
 static const yytype_int16 yytable[] =
 {
-      10,    88,    89,    54,   146,   147,   119,     1,   122,   164,
-      93,   141,    56,   142,    58,   156,    94,    62,     1,    90,
-      91,   131,    65,    66,   144,   145,    67,    90,    91,   132,
-     127,    68,   136,   -31,    97,     2,   154,   -31,   -31,   -31,
-     -31,   -31,   -31,   -31,   -31,    98,    52,   -31,   -31,    99,
-     -31,   100,   101,   102,   103,   104,   -31,   105,   129,   106,
-     138,   173,    92,   141,   107,   142,   174,   172,     8,     9,
-     143,   -33,    97,    90,    91,   -33,   -33,   -33,   -33,   -33,
-     -33,   -33,   -33,    98,   166,   -33,   -33,    99,   -33,   100,
-     101,   102,   103,   104,   -33,   105,    11,   106,   179,   151,
-     123,   126,   107,   135,   125,   130,     2,   139,     2,    90,
-      91,    -5,    12,    55,   161,    13,    14,    15,    16,    17,
-      18,    19,    20,    65,    66,    21,    22,    23,    24,    25,
-      26,    27,    28,    29,    30,    57,    59,    31,    61,    -4,
-      12,    63,    32,    13,    14,    15,    16,    17,    18,    19,
-      20,    64,    71,    21,    22,    23,    24,    25,    26,    27,
-      28,    29,    30,    72,    73,    31,   180,    90,    91,    52,
-      32,   -85,    97,    82,    83,   -85,   -85,   -85,   -85,   -85,
-     -85,   -85,   -85,    84,   190,   -85,   -85,    99,   -85,   -85,
-     -85,   -85,   -85,   -85,   -85,    85,    97,   106,    86,    87,
-     -52,   -52,   140,   -52,   -52,   -52,   -52,    98,    95,   -52,
-     -52,    99,   114,   115,   116,   117,    96,   148,   149,   150,
-     158,   106,   155,   159,    97,   163,   118,   -76,   -76,   -76,
-     -76,   -76,   -76,   -76,   -76,   160,   164,   -76,   -76,    99,
-      13,    14,    15,    16,    17,    18,    19,    20,    91,   106,
-      21,    22,    14,    15,   140,    17,    18,    19,    20,   168,
-     175,    21,    22,   177,   181,   182,   183,    32,   187,   167,
-     188,   169,   170,   171,   185,   189,    53,    51,    32,   176,
-      75,   178,   121,     0,   133,   162,     0,     0,     0,     0,
-     184
+      10,    88,    89,   150,   151,   152,   153,   154,   155,   135,
+      54,   123,    56,    11,    58,   126,   145,    62,   164,     2,
+     146,   136,     1,     1,   148,   149,   147,   -31,   101,    90,
+      91,   -31,   -31,   -31,   -31,   -31,   -31,   -31,   -31,   102,
+     162,   -31,   -31,   103,   -31,   104,   105,   106,   107,   108,
+     -31,   109,   181,   110,     2,    52,    55,    65,    66,   172,
+      57,   182,   111,     8,     9,    67,   131,    59,   140,    92,
+      68,    61,   145,   133,   180,   142,   146,    65,    66,    -5,
+      12,    90,    91,    13,    14,    15,    16,    17,    18,    19,
+      20,    71,   174,    21,    22,    23,    24,    25,    26,    27,
+      28,    29,    30,   159,    63,    31,   187,   127,   130,    64,
+     139,     2,    90,    91,    32,   -33,   101,    72,   169,   -33,
+     -33,   -33,   -33,   -33,   -33,   -33,   -33,   102,    73,   -33,
+     -33,   103,   -33,   104,   105,   106,   107,   108,   -33,   109,
+      52,   110,   129,   134,    82,   143,    83,    -4,    12,    84,
+     111,    13,    14,    15,    16,    17,    18,    19,    20,    90,
+      91,    21,    22,    23,    24,    25,    26,    27,    28,    29,
+      30,    85,    86,    31,   188,    90,    91,    87,    99,   -85,
+     101,   100,    32,   -85,   -85,   -85,   -85,   -85,   -85,   -85,
+     -85,   156,   198,   -85,   -85,   103,   -85,   -85,   -85,   -85,
+     -85,   -85,   -85,   157,   163,   110,   158,   166,   101,   167,
+     168,   171,   -52,   -52,   144,   -52,   -52,   -52,   -52,   102,
+      91,   -52,   -52,   103,   118,   119,   120,   121,   172,   176,
+     183,   101,   185,   110,   -76,   -76,   -76,   -76,   -76,   -76,
+     -76,   -76,   122,   189,   -76,   -76,   103,    13,    14,    15,
+      16,    17,    18,    19,    20,   190,   110,    21,    22,   191,
+     193,   195,   196,    14,    15,   144,    17,    18,    19,    20,
+     197,    53,    21,    22,    51,    75,   125,   175,    32,   177,
+     178,   179,    93,    94,    95,    96,    97,   184,   137,   186,
+     170,     0,    98,    32,     0,     0,     0,     0,   192
 };
 
 #define yypact_value_is_default(yystate) \
-  ((yystate) == (-90))
+  ((yystate) == (-91))
 
 #define yytable_value_is_error(yytable_value) \
   YYID (0)
 
 static const yytype_int16 yycheck[] =
 {
-       1,    67,    68,    10,    93,    94,    76,     3,    76,    14,
-      28,    81,    13,    81,    15,   104,    34,    18,     3,    32,
-      33,    23,    26,    27,    90,    91,    30,    32,    33,    31,
-      78,    35,    80,     0,     1,    31,   102,     4,     5,     6,
-       7,     8,     9,    10,    11,    12,    31,    14,    15,    16,
-      17,    18,    19,    20,    21,    22,    23,    24,    78,    26,
-      80,    26,    69,   133,    31,   133,    31,   156,    26,    27,
-      29,     0,     1,    32,    33,     4,     5,     6,     7,     8,
-       9,    10,    11,    12,   150,    14,    15,    16,    17,    18,
-      19,    20,    21,    22,    23,    24,     0,    26,   164,   100,
-      77,    78,    31,    80,    77,    78,    31,    80,    31,    32,
-      33,     0,     1,    31,   115,     4,     5,     6,     7,     8,
-       9,    10,    11,    26,    27,    14,    15,    16,    17,    18,
-      19,    20,    21,    22,    23,    31,    26,    26,    31,     0,
-       1,    26,    31,     4,     5,     6,     7,     8,     9,    10,
-      11,    26,    31,    14,    15,    16,    17,    18,    19,    20,
-      21,    22,    23,     1,     1,    26,    31,    32,    33,    31,
-      31,     0,     1,    31,    31,     4,     5,     6,     7,     8,
-       9,    10,    11,    31,   185,    14,    15,    16,    17,    18,
-      19,    20,    21,    22,    23,    31,     1,    26,    31,    31,
-       5,     6,    31,     8,     9,    10,    11,    12,    31,    14,
-      15,    16,    17,    18,    19,    20,    31,    31,    31,    25,
-       1,    26,    26,    13,     1,    26,    31,     4,     5,     6,
-       7,     8,     9,    10,    11,    31,    14,    14,    15,    16,
-       4,     5,     6,     7,     8,     9,    10,    11,    33,    26,
-      14,    15,     5,     6,    31,     8,     9,    10,    11,    31,
-      31,    14,    15,    31,    31,    31,    31,    31,    31,   151,
-      31,   153,   154,   155,    34,    31,     7,     6,    31,   161,
-      37,   163,    76,    -1,    79,   116,    -1,    -1,    -1,    -1,
-     172
+       1,    67,    68,    93,    94,    95,    96,    97,    98,    23,
+      10,    76,    13,     0,    15,    76,    81,    18,   108,    35,
+      81,    35,     3,     3,    90,    91,    33,     0,     1,    36,
+      37,     4,     5,     6,     7,     8,     9,    10,    11,    12,
+     106,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    24,    26,    26,    35,    35,    35,    26,    27,    14,
+      35,    35,    35,    26,    27,    34,    78,    26,    80,    69,
+      39,    35,   137,    78,   164,    80,   137,    26,    27,     0,
+       1,    36,    37,     4,     5,     6,     7,     8,     9,    10,
+      11,    35,   158,    14,    15,    16,    17,    18,    19,    20,
+      21,    22,    23,   104,    26,    26,   172,    77,    78,    26,
+      80,    35,    36,    37,    35,     0,     1,     1,   119,     4,
+       5,     6,     7,     8,     9,    10,    11,    12,     1,    14,
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    24,
+      35,    26,    77,    78,    35,    80,    35,     0,     1,    35,
+      35,     4,     5,     6,     7,     8,     9,    10,    11,    36,
+      37,    14,    15,    16,    17,    18,    19,    20,    21,    22,
+      23,    35,    35,    26,    35,    36,    37,    35,    35,     0,
+       1,    35,    35,     4,     5,     6,     7,     8,     9,    10,
+      11,    35,   193,    14,    15,    16,    17,    18,    19,    20,
+      21,    22,    23,    35,    26,    26,    25,     1,     1,    13,
+      35,    26,     5,     6,    35,     8,     9,    10,    11,    12,
+      37,    14,    15,    16,    17,    18,    19,    20,    14,    35,
+      35,     1,    35,    26,     4,     5,     6,     7,     8,     9,
+      10,    11,    35,    35,    14,    15,    16,     4,     5,     6,
+       7,     8,     9,    10,    11,    35,    26,    14,    15,    35,
+      38,    35,    35,     5,     6,    35,     8,     9,    10,    11,
+      35,     7,    14,    15,     6,    37,    76,   159,    35,   161,
+     162,   163,    28,    29,    30,    31,    32,   169,    79,   171,
+     120,    -1,    38,    35,    -1,    -1,    -1,    -1,   180
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
    symbol of state STATE-NUM.  */
 static const yytype_uint8 yystos[] =
 {
-       0,     3,    31,    37,    38,    39,    63,    81,    26,    27,
-      79,     0,     1,     4,     5,     6,     7,     8,     9,    10,
+       0,     3,    35,    41,    42,    43,    67,    85,    26,    27,
+      83,     0,     1,     4,     5,     6,     7,     8,     9,    10,
       11,    14,    15,    16,    17,    18,    19,    20,    21,    22,
-      23,    26,    31,    40,    41,    43,    44,    45,    46,    52,
-      53,    55,    59,    61,    64,    65,    67,    69,    70,    71,
-      80,    39,    31,    38,    81,    31,    79,    31,    79,    26,
-      85,    31,    79,    26,    26,    26,    27,    30,    35,    83,
-      84,    31,     1,     1,    47,    47,    56,    58,    62,    76,
-      68,    74,    31,    31,    31,    31,    31,    31,    83,    83,
-      32,    33,    81,    28,    34,    31,    31,     1,    12,    16,
-      18,    19,    20,    21,    22,    24,    26,    31,    42,    48,
-      49,    72,    73,    75,    17,    18,    19,    20,    31,    42,
-      57,    73,    75,    41,    54,    80,    41,    55,    60,    67,
-      80,    23,    31,    74,    77,    41,    55,    66,    67,    80,
-      31,    42,    75,    29,    83,    83,    84,    84,    31,    31,
-      25,    79,    78,    79,    83,    26,    84,    50,     1,    13,
-      31,    79,    78,    26,    14,    82,    83,    82,    31,    82,
-      82,    82,    84,    26,    31,    31,    82,    31,    82,    83,
-      31,    31,    31,    31,    82,    34,    51,    31,    31,    31,
-      79
+      23,    26,    35,    44,    45,    47,    48,    49,    50,    56,
+      57,    59,    63,    65,    68,    69,    71,    73,    74,    75,
+      84,    43,    35,    42,    85,    35,    83,    35,    83,    26,
+      89,    35,    83,    26,    26,    26,    27,    34,    39,    87,
+      88,    35,     1,     1,    51,    51,    60,    62,    66,    80,
+      72,    78,    35,    35,    35,    35,    35,    35,    87,    87,
+      36,    37,    85,    28,    29,    30,    31,    32,    38,    35,
+      35,     1,    12,    16,    18,    19,    20,    21,    22,    24,
+      26,    35,    46,    52,    53,    76,    77,    79,    17,    18,
+      19,    20,    35,    46,    61,    77,    79,    45,    58,    84,
+      45,    59,    64,    71,    84,    23,    35,    78,    81,    45,
+      59,    70,    71,    84,    35,    46,    79,    33,    87,    87,
+      88,    88,    88,    88,    88,    88,    35,    35,    25,    83,
+      82,    83,    87,    26,    88,    54,     1,    13,    35,    83,
+      82,    26,    14,    86,    87,    86,    35,    86,    86,    86,
+      88,    26,    35,    35,    86,    35,    86,    87,    35,    35,
+      35,    35,    86,    38,    55,    35,    35,    35,    83
 };
 
 #define yyerrok                (yyerrstatus = 0)
@@ -823,17 +842,18 @@ static const yytype_uint8 yystos[] =
 
 #define YYRECOVERING()  (!!yyerrstatus)
 
-#define YYBACKUP(Token, Value)                                 \
-do                                                             \
-  if (yychar == YYEMPTY && yylen == 1)                         \
-    {                                                          \
-      yychar = (Token);                                                \
-      yylval = (Value);                                                \
-      YYPOPSTACK (1);                                          \
-      goto yybackup;                                           \
-    }                                                          \
-  else                                                         \
-    {                                                          \
+#define YYBACKUP(Token, Value)                                  \
+do                                                              \
+  if (yychar == YYEMPTY)                                        \
+    {                                                           \
+      yychar = (Token);                                         \
+      yylval = (Value);                                         \
+      YYPOPSTACK (yylen);                                       \
+      yystate = *yyssp;                                         \
+      goto yybackup;                                            \
+    }                                                           \
+  else                                                          \
+    {                                                           \
       yyerror (YY_("syntax error: cannot back up")); \
       YYERROR;                                                 \
     }                                                          \
@@ -928,6 +948,8 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep)
     YYSTYPE const * const yyvaluep;
 #endif
 {
+  FILE *yyo = yyoutput;
+  YYUSE (yyo);
   if (!yyvaluep)
     return;
 # ifdef YYPRINT
@@ -1179,12 +1201,12 @@ static int
 yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
                 yytype_int16 *yyssp, int yytoken)
 {
-  YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+  YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]);
   YYSIZE_T yysize = yysize0;
   YYSIZE_T yysize1;
   enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
   /* Internationalized format string. */
-  const char *yyformat = 0;
+  const char *yyformat = YY_NULL;
   /* Arguments of yyformat. */
   char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
   /* Number of reported tokens (one for the "unexpected", one per
@@ -1244,7 +1266,7 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
                     break;
                   }
                 yyarg[yycount++] = yytname[yyx];
-                yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+                yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]);
                 if (! (yysize <= yysize1
                        && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
                   return 2;
@@ -1329,7 +1351,7 @@ yydestruct (yymsg, yytype, yyvaluep)
 
   switch (yytype)
     {
-      case 53: /* "choice_entry" */
+      case 57: /* "choice_entry" */
 
        {
        fprintf(stderr, "%s:%d: missing end statement for this entry\n",
@@ -1339,7 +1361,7 @@ yydestruct (yymsg, yytype, yyvaluep)
 };
 
        break;
-      case 59: /* "if_entry" */
+      case 63: /* "if_entry" */
 
        {
        fprintf(stderr, "%s:%d: missing end statement for this entry\n",
@@ -1349,7 +1371,7 @@ yydestruct (yymsg, yytype, yyvaluep)
 };
 
        break;
-      case 65: /* "menu_entry" */
+      case 69: /* "menu_entry" */
 
        {
        fprintf(stderr, "%s:%d: missing end statement for this entry\n",
@@ -1426,7 +1448,7 @@ yyparse ()
        `yyss': related to states.
        `yyvs': related to semantic values.
 
-       Refer to the stacks thru separate pointers, to allow yyoverflow
+       Refer to the stacks through separate pointers, to allow yyoverflow
        to reallocate them elsewhere.  */
 
     /* The state stack.  */
@@ -2012,46 +2034,66 @@ yyreduce:
 
   case 109:
 
-    { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    { (yyval.expr) = expr_alloc_comp(E_LTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
     break;
 
   case 110:
 
-    { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
+    { (yyval.expr) = expr_alloc_comp(E_LEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
     break;
 
   case 111:
 
-    { (yyval.expr) = (yyvsp[(2) - (3)].expr); }
+    { (yyval.expr) = expr_alloc_comp(E_GTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
     break;
 
   case 112:
 
-    { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); }
+    { (yyval.expr) = expr_alloc_comp(E_GEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
     break;
 
   case 113:
 
-    { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); }
+    { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
     break;
 
   case 114:
 
-    { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); }
+    { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); }
     break;
 
   case 115:
 
-    { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); }
+    { (yyval.expr) = (yyvsp[(2) - (3)].expr); }
     break;
 
   case 116:
 
-    { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); }
+    { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); }
     break;
 
   case 117:
 
+    { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); }
+    break;
+
+  case 118:
+
+    { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); }
+    break;
+
+  case 119:
+
+    { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); }
+    break;
+
+  case 120:
+
+    { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); }
+    break;
+
+  case 121:
+
     { (yyval.string) = NULL; }
     break;
 
@@ -2243,7 +2285,7 @@ yyabortlab:
   yyresult = 1;
   goto yyreturn;
 
-#if !defined(yyoverflow) || YYERROR_VERBOSE
+#if !defined yyoverflow || YYERROR_VERBOSE
 /*-------------------------------------------------.
 | yyexhaustedlab -- memory exhaustion comes here.  |
 `-------------------------------------------------*/
index 0f683cfa53e9abf9aecc032e03334fca182c0d20..71bf8bff696a41ff3f3ff8cdfe46149e4e11506d 100644 (file)
@@ -69,6 +69,10 @@ static struct menu *current_menu, *current_entry;
 %token <string> T_WORD
 %token <string> T_WORD_QUOTE
 %token T_UNEQUAL
+%token T_LESS
+%token T_LESS_EQUAL
+%token T_GREATER
+%token T_GREATER_EQUAL
 %token T_CLOSE_PAREN
 %token T_OPEN_PAREN
 %token T_EOL
@@ -76,6 +80,7 @@ static struct menu *current_menu, *current_entry;
 %left T_OR
 %left T_AND
 %left T_EQUAL T_UNEQUAL
+%left T_LESS T_LESS_EQUAL T_GREATER T_GREATER_EQUAL
 %nonassoc T_NOT
 
 %type <string> prompt
@@ -467,6 +472,10 @@ if_expr:  /* empty */                      { $$ = NULL; }
 ;
 
 expr:    symbol                                { $$ = expr_alloc_symbol($1); }
+       | symbol T_LESS symbol                  { $$ = expr_alloc_comp(E_LTH, $1, $3); }
+       | symbol T_LESS_EQUAL symbol            { $$ = expr_alloc_comp(E_LEQ, $1, $3); }
+       | symbol T_GREATER symbol               { $$ = expr_alloc_comp(E_GTH, $1, $3); }
+       | symbol T_GREATER_EQUAL symbol         { $$ = expr_alloc_comp(E_GEQ, $1, $3); }
        | symbol T_EQUAL symbol                 { $$ = expr_alloc_comp(E_EQUAL, $1, $3); }
        | symbol T_UNEQUAL symbol               { $$ = expr_alloc_comp(E_UNEQUAL, $1, $3); }
        | T_OPEN_PAREN expr T_CLOSE_PAREN       { $$ = $2; }
index 86a4fe75f453735936e3b218f885dcd887216659..1a10d8ac81620faad519d4f95ca8552ac4a958c1 100755 (executable)
@@ -82,7 +82,7 @@ kallsyms()
                kallsymopt="${kallsymopt} --all-symbols"
        fi
 
-       if [ -n "${CONFIG_ARM}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then
+       if [ -n "${CONFIG_ARM}" ] && [ -z "${CONFIG_XIP_KERNEL}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then
                kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET"
        fi
 
@@ -111,7 +111,6 @@ sortextable()
 }
 
 # Delete output files in case of error
-trap cleanup SIGHUP SIGINT SIGQUIT SIGTERM ERR
 cleanup()
 {
        rm -f .old_version
@@ -124,6 +123,20 @@ cleanup()
        rm -f vmlinux.o
 }
 
+on_exit()
+{
+       if [ $? -ne 0 ]; then
+               cleanup
+       fi
+}
+trap on_exit EXIT
+
+on_signals()
+{
+       exit 1
+}
+trap on_signals HUP INT QUIT TERM
+
 #
 #
 # Use "make V=1" to debug this script
@@ -231,7 +244,6 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
        if ! cmp -s System.map .tmp_System.map; then
                echo >&2 Inconsistent kallsyms data
                echo >&2 Try "make KALLSYMS_EXTRA_PASS=1" as a workaround
-               cleanup
                exit 1
        fi
 fi
index cdb491d845035e59bff19a5cde7f1ca84c28c17f..c0a932dff3290c4675688f5ce865c1f933a88b0d 100755 (executable)
@@ -154,7 +154,7 @@ exuberant()
 {
        all_target_sources | xargs $1 -a                        \
        -I __initdata,__exitdata,__initconst,                   \
-       -I __cpuinitdata,__initdata_memblock                    \
+       -I __initdata_memblock                                  \
        -I __refdata,__attribute,__maybe_unused,__always_unused \
        -I __acquires,__releases,__deprecated                   \
        -I __read_mostly,__aligned,____cacheline_aligned        \
index 5da129e10aa2dcd049647d10d025c26e08b7cf65..326e826a5d20ac5ee3829aea1aca23dfc964c2ce 100644 (file)
@@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts)
                    argv[acpi_gbl_optind][0] != '-' ||
                    argv[acpi_gbl_optind][1] == '\0') {
                        return (ACPI_OPT_END);
-               } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) {
+               } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) {
                        acpi_gbl_optind++;
                        return (ACPI_OPT_END);
                }
@@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts)
        /* Make sure that the option is legal */
 
        if (current_char == ':' ||
-           (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) {
+           (opts_ptr = strchr(opts, current_char)) == NULL) {
                ACPI_OPTION_ERROR("Illegal option: -", current_char);
 
                if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') {
index 38f095d86b5260d54a30f8dfde8da33de91d1995..79e2d1d435d1337dbb35d138b1e3e448a60217eb 100644 (file)
@@ -22,9 +22,6 @@ acpidump options are as follow:
 .B \-b
 Dump tables to binary files
 .TP
-.B \-c
-Dump customized tables
-.TP
 .B \-h \-?
 This help message
 .TP
@@ -48,15 +45,25 @@ Verbose mode
 .B \-a <Address>
 Get table via a physical address
 .TP
+.B \-c <on|off>
+Turning on/off customized table dumping
+.TP
 .B \-f <BinaryFile>
 Get table via a binary file
 .TP
 .B \-n <Signature>
 Get table via a name/signature
 .TP
-Invocation without parameters dumps all available tables
+.B \-x
+Do not use but dump XSDT
+.TP
+.B \-x \-x
+Do not use or dump XSDT
+.TP
+.fi
+Invocation without parameters dumps all available tables.
 .TP
-Multiple mixed instances of -a, -f, and -n are supported
+Multiple mixed instances of -a, -f, and -n are supported.
 
 .SH EXAMPLES
 
index db15c9d2049e08f6090d3d391a7617246aa9c1c5..dd5008b0617a033c76b0fda581aa4fb6a74e5788 100644 (file)
@@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address,
                goto exit;
        }
 
-       ACPI_MEMCPY(local_table, mapped_table, table_length);
+       memcpy(local_table, mapped_table, table_length);
 
 exit:
        osl_unmap_table(mapped_table);
@@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void)
        gbl_rsdp_address =
            rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address);
 
-       ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp));
+       memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp));
        acpi_os_unmap_memory(rsdp_address, rsdp_size);
 
        return (AE_OK);
@@ -582,64 +582,67 @@ static acpi_status osl_table_initialize(void)
                return (AE_OK);
        }
 
-       /* Get RSDP from memory */
+       if (!gbl_dump_customized_tables) {
 
-       status = osl_load_rsdp();
-       if (ACPI_FAILURE(status)) {
-               return (status);
-       }
+               /* Get RSDP from memory */
+
+               status = osl_load_rsdp();
+               if (ACPI_FAILURE(status)) {
+                       return (status);
+               }
 
-       /* Get XSDT from memory */
+               /* Get XSDT from memory */
 
-       if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) {
-               if (gbl_xsdt) {
-                       free(gbl_xsdt);
-                       gbl_xsdt = NULL;
+               if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) {
+                       if (gbl_xsdt) {
+                               free(gbl_xsdt);
+                               gbl_xsdt = NULL;
+                       }
+
+                       gbl_revision = 2;
+                       status = osl_get_bios_table(ACPI_SIG_XSDT, 0,
+                                                   ACPI_CAST_PTR(struct
+                                                                 acpi_table_header
+                                                                 *, &gbl_xsdt),
+                                                   &address);
+                       if (ACPI_FAILURE(status)) {
+                               return (status);
+                       }
                }
 
-               gbl_revision = 2;
-               status = osl_get_bios_table(ACPI_SIG_XSDT, 0,
-                                           ACPI_CAST_PTR(struct
-                                                         acpi_table_header *,
-                                                         &gbl_xsdt), &address);
-               if (ACPI_FAILURE(status)) {
-                       return (status);
+               /* Get RSDT from memory */
+
+               if (gbl_rsdp.rsdt_physical_address) {
+                       if (gbl_rsdt) {
+                               free(gbl_rsdt);
+                               gbl_rsdt = NULL;
+                       }
+
+                       status = osl_get_bios_table(ACPI_SIG_RSDT, 0,
+                                                   ACPI_CAST_PTR(struct
+                                                                 acpi_table_header
+                                                                 *, &gbl_rsdt),
+                                                   &address);
+                       if (ACPI_FAILURE(status)) {
+                               return (status);
+                       }
                }
-       }
 
-       /* Get RSDT from memory */
+               /* Get FADT from memory */
 
-       if (gbl_rsdp.rsdt_physical_address) {
-               if (gbl_rsdt) {
-                       free(gbl_rsdt);
-                       gbl_rsdt = NULL;
+               if (gbl_fadt) {
+                       free(gbl_fadt);
+                       gbl_fadt = NULL;
                }
 
-               status = osl_get_bios_table(ACPI_SIG_RSDT, 0,
+               status = osl_get_bios_table(ACPI_SIG_FADT, 0,
                                            ACPI_CAST_PTR(struct
                                                          acpi_table_header *,
-                                                         &gbl_rsdt), &address);
+                                                         &gbl_fadt),
+                                           &gbl_fadt_address);
                if (ACPI_FAILURE(status)) {
                        return (status);
                }
-       }
-
-       /* Get FADT from memory */
-
-       if (gbl_fadt) {
-               free(gbl_fadt);
-               gbl_fadt = NULL;
-       }
-
-       status = osl_get_bios_table(ACPI_SIG_FADT, 0,
-                                   ACPI_CAST_PTR(struct acpi_table_header *,
-                                                 &gbl_fadt),
-                                   &gbl_fadt_address);
-       if (ACPI_FAILURE(status)) {
-               return (status);
-       }
-
-       if (!gbl_dump_customized_tables) {
 
                /* Add mandatory tables to global table list first */
 
@@ -961,7 +964,7 @@ osl_get_bios_table(char *signature,
                goto exit;
        }
 
-       ACPI_MEMCPY(local_table, mapped_table, table_length);
+       memcpy(local_table, mapped_table, table_length);
        *address = table_address;
        *table = local_table;
 
index 0b1fa290245a904d3255d1fd84976baab2f09851..44ad4889d468cad4b576e39eaad4ef5ba3844b07 100644 (file)
@@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap")
 #ifndef O_BINARY
 #define O_BINARY 0
 #endif
-#ifdef _free_BSD
+#if defined(_dragon_fly) || defined(_free_BSD)
 #define MMAP_FLAGS          MAP_SHARED
 #else
 #define MMAP_FLAGS          MAP_PRIVATE
index 84bdef0136cbc15a6b9aea1c7c435f57452445fa..eed534481434301586fe13e28150a58677e57ac2 100644 (file)
@@ -66,7 +66,7 @@
 EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE);
 EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE);
 EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE);
-EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE);
+EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE);
 EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE);
 EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL);
 EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL);
index c736adf5fb55241ecb564a796496b2fad32e3b67..61d0de804b709d9568c1870565876f9c7fa0d5c0 100644 (file)
@@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature)
        acpi_status status;
        int table_status;
 
-       if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) {
+       if (strlen(signature) != ACPI_NAME_SIZE) {
                acpi_log_error
                    ("Invalid table signature [%s]: must be exactly 4 characters\n",
                     signature);
@@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature)
 
        /* Table signatures are expected to be uppercase */
 
-       ACPI_STRCPY(local_signature, signature);
+       strcpy(local_signature, signature);
        acpi_ut_strupr(local_signature);
 
        /* To be friendly, handle tables whose signatures do not match the name */
 
        if (ACPI_COMPARE_NAME(local_signature, "FADT")) {
-               ACPI_STRCPY(local_signature, ACPI_SIG_FADT);
+               strcpy(local_signature, ACPI_SIG_FADT);
        } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) {
-               ACPI_STRCPY(local_signature, ACPI_SIG_MADT);
+               strcpy(local_signature, ACPI_SIG_MADT);
        }
 
        /* Dump all instances of this signature (to handle multiple SSDTs) */
index 8f2fe168228eb12d9e97efa085d9d5d5d41d94f3..a37f9702b2a90433978f4a6715919c4a9f8e11e6 100644 (file)
@@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
        } else {
                ACPI_MOVE_NAME(filename, table->signature);
        }
-       filename[0] = (char)ACPI_TOLOWER(filename[0]);
-       filename[1] = (char)ACPI_TOLOWER(filename[1]);
-       filename[2] = (char)ACPI_TOLOWER(filename[2]);
-       filename[3] = (char)ACPI_TOLOWER(filename[3]);
+       filename[0] = (char)tolower((int)filename[0]);
+       filename[1] = (char)tolower((int)filename[1]);
+       filename[2] = (char)tolower((int)filename[2]);
+       filename[3] = (char)tolower((int)filename[3]);
        filename[ACPI_NAME_SIZE] = 0;
 
        /* Handle multiple SSDts - create different filenames for each */
@@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
        if (instance > 0) {
                acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u",
                                 instance);
-               ACPI_STRCAT(filename, instance_str);
+               strcat(filename, instance_str);
        }
 
-       ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX);
+       strcat(filename, ACPI_TABLE_FILE_SUFFIX);
 
        if (gbl_verbose_mode) {
                acpi_log_error
index d0ba6535f5af0d0641487cda70ebb5b89ce24784..57620f66ae6c65c3db94952b83d01b8b492eb2fb 100644 (file)
@@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS];
 u32 current_action = 0;
 
 #define AP_UTILITY_NAME             "ACPI Binary Table Dump Utility"
-#define AP_SUPPORTED_OPTIONS        "?a:bcf:hn:o:r:svxz"
+#define AP_SUPPORTED_OPTIONS        "?a:bc:f:hn:o:r:svxz"
 
 /******************************************************************************
  *
@@ -96,7 +96,6 @@ static void ap_display_usage(void)
        ACPI_USAGE_HEADER("acpidump [options]");
 
        ACPI_OPTION("-b", "Dump tables to binary files");
-       ACPI_OPTION("-c", "Dump customized tables");
        ACPI_OPTION("-h -?", "This help message");
        ACPI_OPTION("-o <File>", "Redirect output to file");
        ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP");
@@ -107,6 +106,7 @@ static void ap_display_usage(void)
        ACPI_USAGE_TEXT("\nTable Options:\n");
 
        ACPI_OPTION("-a <Address>", "Get table via a physical address");
+       ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping");
        ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file");
        ACPI_OPTION("-n <Signature>", "Get table via a name/signature");
        ACPI_OPTION("-x", "Do not use but dump XSDT");
@@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv)
 
                case 'c':       /* Dump customized tables */
 
-                       gbl_dump_customized_tables = TRUE;
+                       if (!strcmp(acpi_gbl_optarg, "on")) {
+                               gbl_dump_customized_tables = TRUE;
+                       } else if (!strcmp(acpi_gbl_optarg, "off")) {
+                               gbl_dump_customized_tables = FALSE;
+                       } else {
+                               acpi_log_error
+                                   ("%s: Cannot handle this switch, please use on|off\n",
+                                    acpi_gbl_optarg);
+                               return (-1);
+                       }
                        continue;
 
                case 'h':