Merge tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2015 15:46:32 +0000 (08:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2015 15:46:32 +0000 (08:46 -0700)
Pull powerpc updates from Michael Ellerman:

 - disable the 32-bit vdso when building LE, so we can build with a
   64-bit only toolchain.

 - EEH fixes from Gavin & Richard.

 - enable the sys_kcmp syscall from Laurent.

 - sysfs control for fastsleep workaround from Shreyas.

 - expose OPAL events as an irq chip by Alistair.

 - MSI ops moved to pci_controller_ops by Daniel.

 - fix for kernel to userspace backtraces for perf from Anton.

 - merge pseries and pseries_le defconfigs from Cyril.

 - CXL in-kernel API from Mikey.

 - OPAL prd driver from Jeremy.

 - fix for DSCR handling & tests from Anshuman.

 - Powernv flash mtd driver from Cyril.

 - dynamic DMA Window support on powernv from Alexey.

 - LLVM clang fixes & workarounds from Anton.

 - reworked version of the patch to abort syscalls when transactional.

 - fix the swap encoding to support 4TB, from Aneesh.

 - various fixes as usual.

 - Freescale updates from Scott: Highlights include more 8xx
   optimizations, an e6500 hugetlb optimization, QMan device tree nodes,
   t1024/t1023 support, and various fixes and cleanup.

* tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (180 commits)
  cxl: Fix typo in debug print
  cxl: Add CXL_KERNEL_API config option
  powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma()
  powerpc/mm: Change the swap encoding in pte.
  powerpc/mm: PTE_RPN_MAX is not used, remove the same
  powerpc/tm: Abort syscalls in active transactions
  powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off
  powerpc/include: Add opal-prd to installed uapi headers
  powerpc/powernv: fix construction of opal PRD messages
  powerpc/powernv: Increase opal-irqchip initcall priority
  powerpc: Make doorbell check preemption safe
  powerpc/powernv: pnv_init_idle_states() should only run on powernv
  macintosh/nvram: Remove as unused
  powerpc: Don't use gcc specific options on clang
  powerpc: Don't use -mno-strict-align on clang
  powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it
  powerpc: Only use -mabi=altivec if toolchain supports it
  powerpc: Fix duplicate const clang warning in user access code
  vfio: powerpc/spapr: Support Dynamic DMA windows
  vfio: powerpc/spapr: Register memory and define IOMMU v2
  ...

211 files changed:
Documentation/ABI/testing/sysfs-class-cxl
Documentation/devicetree/bindings/powerpc/fsl/fman.txt
Documentation/devicetree/bindings/powerpc/fsl/guts.txt
Documentation/devicetree/bindings/soc/fsl/qman-portals.txt
Documentation/powerpc/00-INDEX
Documentation/powerpc/cxl.txt
Documentation/powerpc/dscr.txt [new file with mode: 0644]
Documentation/powerpc/transactional_memory.txt
Documentation/vfio.txt
MAINTAINERS
arch/powerpc/Kconfig.debug
arch/powerpc/Makefile
arch/powerpc/boot/dts/b4qds.dtsi
arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
arch/powerpc/boot/dts/fsl/b4si-post.dtsi
arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
arch/powerpc/boot/dts/fsl/t1023si-post.dtsi [new file with mode: 0644]
arch/powerpc/boot/dts/fsl/t1024si-post.dtsi [new file with mode: 0644]
arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi [new file with mode: 0644]
arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
arch/powerpc/boot/dts/kmcoge4.dts
arch/powerpc/boot/dts/oca4080.dts
arch/powerpc/boot/dts/p1023rdb.dts
arch/powerpc/boot/dts/p2041rdb.dts
arch/powerpc/boot/dts/p3041ds.dts
arch/powerpc/boot/dts/p4080ds.dts
arch/powerpc/boot/dts/p5020ds.dts
arch/powerpc/boot/dts/p5040ds.dts
arch/powerpc/boot/dts/t1023rdb.dts [new file with mode: 0644]
arch/powerpc/boot/dts/t1024qds.dts [new file with mode: 0644]
arch/powerpc/boot/dts/t1024rdb.dts [new file with mode: 0644]
arch/powerpc/boot/dts/t104xqds.dtsi
arch/powerpc/boot/dts/t104xrdb.dtsi
arch/powerpc/boot/dts/t208xqds.dtsi
arch/powerpc/boot/dts/t208xrdb.dtsi
arch/powerpc/boot/dts/t4240qds.dts
arch/powerpc/boot/dts/t4240rdb.dts
arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
arch/powerpc/configs/le.config [new file with mode: 0644]
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/configs/pseries_defconfig
arch/powerpc/configs/pseries_le_defconfig [deleted file]
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/cputhreads.h
arch/powerpc/include/asm/device.h
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/iommu.h
arch/powerpc/include/asm/machdep.h
arch/powerpc/include/asm/mmu-8xx.h
arch/powerpc/include/asm/mmu-hash64.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/page.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/pgtable-ppc32.h
arch/powerpc/include/asm/pgtable-ppc64.h
arch/powerpc/include/asm/pnv-pci.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/pte-8xx.h
arch/powerpc/include/asm/pte-book3e.h
arch/powerpc/include/asm/pte-common.h
arch/powerpc/include/asm/pte-hash64.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/trace.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/include/uapi/asm/Kbuild
arch/powerpc/include/uapi/asm/cputable.h
arch/powerpc/include/uapi/asm/eeh.h [new file with mode: 0644]
arch/powerpc/include/uapi/asm/opal-prd.h [new file with mode: 0644]
arch/powerpc/include/uapi/asm/tm.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_cache.c
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_8xx.S
arch/powerpc/kernel/idle_e500.S
arch/powerpc/kernel/iommu.c
arch/powerpc/kernel/msi.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci-hotplug.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/sysfs.c
arch/powerpc/kernel/tm.S
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vio.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/lib/Makefile
arch/powerpc/mm/Makefile
arch/powerpc/mm/copro_fault.c
arch/powerpc/mm/hash_native_64.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmu_context_hash64.c
arch/powerpc/mm/mmu_context_iommu.c [new file with mode: 0644]
arch/powerpc/mm/tlb_low_64e.S
arch/powerpc/perf/core-book3s.c
arch/powerpc/platforms/52xx/mpc52xx_gpt.c
arch/powerpc/platforms/85xx/Kconfig
arch/powerpc/platforms/85xx/corenet_generic.c
arch/powerpc/platforms/85xx/smp.c
arch/powerpc/platforms/85xx/twr_p102x.c
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/cell/axon_msi.c
arch/powerpc/platforms/cell/iommu.c
arch/powerpc/platforms/embedded6xx/hlwd-pic.c
arch/powerpc/platforms/pasemi/Makefile
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/pasemi/msi.c [new file with mode: 0644]
arch/powerpc/platforms/powernv/Kconfig
arch/powerpc/platforms/powernv/Makefile
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/idle.c [new file with mode: 0644]
arch/powerpc/platforms/powernv/opal-async.c
arch/powerpc/platforms/powernv/opal-dump.c
arch/powerpc/platforms/powernv/opal-elog.c
arch/powerpc/platforms/powernv/opal-hmi.c
arch/powerpc/platforms/powernv/opal-irqchip.c [new file with mode: 0644]
arch/powerpc/platforms/powernv/opal-memory-errors.c
arch/powerpc/platforms/powernv/opal-prd.c [new file with mode: 0644]
arch/powerpc/platforms/powernv/opal-sensor.c
arch/powerpc/platforms/powernv/opal-sysparam.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci-p5ioc2.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/powernv/pci.h
arch/powerpc/platforms/powernv/powernv.h
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/eeh_pseries.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/msi.c
arch/powerpc/sysdev/Makefile
arch/powerpc/sysdev/dart_iommu.c
arch/powerpc/sysdev/fsl_msi.c
arch/powerpc/sysdev/i8259.c
arch/powerpc/sysdev/ipic.c
arch/powerpc/sysdev/mpc8xx_pic.c
arch/powerpc/sysdev/mpic.c
arch/powerpc/sysdev/mpic.h
arch/powerpc/sysdev/mpic_pasemi_msi.c [deleted file]
arch/powerpc/sysdev/mpic_u3msi.c
arch/powerpc/sysdev/mv64x60_pic.c
arch/powerpc/sysdev/ppc4xx_hsta_msi.c
arch/powerpc/sysdev/ppc4xx_msi.c
arch/powerpc/sysdev/qe_lib/qe_ic.c
arch/powerpc/sysdev/tsi108_pci.c
arch/powerpc/sysdev/uic.c
arch/powerpc/sysdev/xics/icp-native.c
arch/powerpc/sysdev/xics/xics-common.c
arch/powerpc/sysdev/xilinx_intc.c
drivers/char/ipmi/ipmi_powernv.c
drivers/macintosh/nvram.c [deleted file]
drivers/misc/cxl/Kconfig
drivers/misc/cxl/Makefile
drivers/misc/cxl/api.c [new file with mode: 0644]
drivers/misc/cxl/base.c
drivers/misc/cxl/context.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/fault.c
drivers/misc/cxl/file.c
drivers/misc/cxl/irq.c
drivers/misc/cxl/main.c
drivers/misc/cxl/native.c
drivers/misc/cxl/pci.c
drivers/misc/cxl/sysfs.c
drivers/misc/cxl/vphb.c [new file with mode: 0644]
drivers/mtd/devices/Kconfig
drivers/mtd/devices/Makefile
drivers/mtd/devices/powernv_flash.c [new file with mode: 0644]
drivers/tty/hvc/hvc_opal.c
drivers/vfio/vfio_iommu_spapr_tce.c
drivers/vfio/vfio_spapr_eeh.c
include/misc/cxl-base.h [new file with mode: 0644]
include/misc/cxl.h
include/uapi/linux/vfio.h
include/uapi/misc/cxl.h
lib/raid6/Makefile
tools/testing/selftests/powerpc/Makefile
tools/testing/selftests/powerpc/dscr/.gitignore [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/Makefile [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr.h [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_default_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/dscr/dscr_user_test.c [new file with mode: 0644]
tools/testing/selftests/powerpc/switch_endian/Makefile
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/powerpc/tm/tm-syscall.c
tools/testing/selftests/powerpc/vphn/Makefile

index d46bba801aace45ed29ea559a7a1c9c1fe764fd5..acfe9df83139a935bca0bde7c3701ff24b147216 100644 (file)
@@ -6,6 +6,17 @@ Example: The real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
 
 Slave contexts (eg. /sys/class/cxl/afu0.0s):
 
+What:           /sys/class/cxl/<afu>/afu_err_buf
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                AFU Error Buffer contents. The contents of this file are
+               application specific and depends on the AFU being used.
+               Applications interacting with the AFU can use this attribute
+               to know about the current error condition and take appropriate
+               action like logging the event etc.
+
+
 What:           /sys/class/cxl/<afu>/irqs_max
 Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
@@ -15,6 +26,7 @@ Description:    read/write
                 that hardware can support (eg. 2037). Write values will limit
                 userspace applications to that many userspace interrupts. Must
                 be >= irqs_min.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/irqs_min
 Date:           September 2014
@@ -24,6 +36,7 @@ Description:    read only
                 userspace must request on a CXL_START_WORK ioctl. Userspace may
                 omit the num_interrupts field in the START_WORK IOCTL to get
                 this minimum automatically.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/mmio_size
 Date:           September 2014
@@ -31,6 +44,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Decimal value of the size of the MMIO space that may be mmaped
                 by userspace.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/modes_supported
 Date:           September 2014
@@ -38,6 +52,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 List of the modes this AFU supports. One per line.
                 Valid entries are: "dedicated_process" and "afu_directed"
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/mode
 Date:           September 2014
@@ -46,6 +61,7 @@ Description:    read/write
                 The current mode the AFU is using. Will be one of the modes
                 given in modes_supported. Writing will change the mode
                 provided that no user contexts are attached.
+Users:         https://github.com/ibm-capi/libcxl
 
 
 What:           /sys/class/cxl/<afu>/prefault_mode
@@ -59,6 +75,7 @@ Description:    read/write
                                  descriptor as an effective address and
                                  prefault what it points to.
                         all: all segments process calling START_WORK maps.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/reset
 Date:           September 2014
@@ -66,12 +83,14 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    write only
                 Writing 1 here will reset the AFU provided there are not
                 contexts active on the AFU.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/api_version
 Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Decimal value of the current version of the kernel/user API.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/api_version_compatible
 Date:           September 2014
@@ -79,6 +98,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Decimal value of the the lowest version of the userspace API
                 this this kernel supports.
+Users:         https://github.com/ibm-capi/libcxl
 
 
 AFU configuration records (eg. /sys/class/cxl/afu0.0/cr0):
@@ -92,6 +112,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                Hexadecimal value of the vendor ID found in this AFU
                configuration record.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/cr<config num>/device
 Date:           February 2015
@@ -99,6 +120,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                Hexadecimal value of the device ID found in this AFU
                configuration record.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/cr<config num>/class
 Date:           February 2015
@@ -106,6 +128,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                Hexadecimal value of the class code found in this AFU
                configuration record.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>/cr<config num>/config
 Date:           February 2015
@@ -115,6 +138,7 @@ Description:    read only
                record. The format is expected to match the either the standard
                or extended configuration space defined by the PCIe
                specification.
+Users:         https://github.com/ibm-capi/libcxl
 
 
 
@@ -126,18 +150,21 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Decimal value of the size of the MMIO space that may be mmaped
                 by userspace. This includes all slave contexts space also.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>m/pp_mmio_len
 Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Decimal value of the Per Process MMIO space length.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<afu>m/pp_mmio_off
 Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Decimal value of the Per Process MMIO space offset.
+Users:         https://github.com/ibm-capi/libcxl
 
 
 Card info (eg. /sys/class/cxl/card0)
@@ -147,12 +174,14 @@ Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Identifies the CAIA Version the card implements.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<card>/psl_revision
 Date:           September 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Identifies the revision level of the PSL.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<card>/base_image
 Date:           September 2014
@@ -162,6 +191,7 @@ Description:    read only
                 that support loadable PSLs. For FPGAs this field identifies
                 the image contained in the on-adapter flash which is loaded
                 during the initial program load.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<card>/image_loaded
 Date:           September 2014
@@ -169,6 +199,7 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    read only
                 Will return "user" or "factory" depending on the image loaded
                 onto the card.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<card>/load_image_on_perst
 Date:           December 2014
@@ -183,6 +214,7 @@ Description:    read/write
                 user or factory image to be loaded.
                 Default is to reload on PERST whichever image the card has
                 loaded.
+Users:         https://github.com/ibm-capi/libcxl
 
 What:           /sys/class/cxl/<card>/reset
 Date:           October 2014
@@ -190,3 +222,4 @@ Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    write only
                 Writing 1 will issue a PERST to card which may cause the card
                 to reload the FPGA depending on load_image_on_perst.
+Users:         https://github.com/ibm-capi/libcxl
index edda55f7400415a41d70efc9ecc9245d0cc60996..1fc5328c0651bb7d5219fa75887d8c2f630df3dc 100644 (file)
@@ -189,6 +189,19 @@ PROPERTIES
                Definition: There is one reg region describing the port
                configuration registers.
 
+- fsl,fman-10g-port
+               Usage: optional
+               Value type: boolean
+               Definition: The default port rate is 1G.
+               If this property exists, the port is s 10G port.
+
+- fsl,fman-best-effort-port
+               Usage: optional
+               Value type: boolean
+               Definition: Can be defined only if 10G-support is set.
+               This property marks a best-effort 10G port (10G port that
+               may not be capable of line rate).
+
 EXAMPLE
 
 port@a8000 {
index 7f150b5012cc33f8f261460935062359e4f1fb1c..b71b2039e112b165aff6be74c3dee3bf77fda1a1 100644 (file)
@@ -9,6 +9,11 @@ Required properties:
 
  - compatible : Should define the compatible device type for
    global-utilities.
+   Possible compatibles:
+       "fsl,qoriq-device-config-1.0"
+       "fsl,qoriq-device-config-2.0"
+       "fsl,<chip>-device-config"
+       "fsl,<chip>-guts"
  - reg : Offset and length of the register set for the device.
 
 Recommended properties:
index 48c4dae5d6f944a11c8170dcaa9400b25cfd2ec5..47e46ccbc170f27f7a53de4d49b1d583587dd7f0 100644 (file)
@@ -47,7 +47,7 @@ PROPERTIES
 
        For additional details about the PAMU/LIODN binding(s) see pamu.txt
 
-- fsl,qman-channel-id
+- cell-index
        Usage:          Required
        Value type:     <u32>
        Definition:     The hardware index of the channel. This can also be
@@ -136,7 +136,7 @@ The example below shows a (P4080) QMan portals container/bus node with two porta
                        reg = <0x4000 0x4000>, <0x101000 0x1000>;
                        interrupts = <106 2 0 0>;
                        fsl,liodn = <3 4>;
-                       fsl,qman-channel-id = <1>;
+                       cell-index = <1>;
 
                        fman0 {
                                fsl,liodn = <0x22>;
index 6fd0e8bb814097233280897b9916190a429eda22..9dc845cf7d88c77eec874cf38ee69231b92eefae 100644 (file)
@@ -30,3 +30,5 @@ ptrace.txt
        - Information on the ptrace interfaces for hardware debug registers.
 transactional_memory.txt
        - Overview of the Power8 transactional memory support.
+dscr.txt
+       - Overview DSCR (Data Stream Control Register) support.
index 2c71ecc519d953bbbd122ca05a7ea2fe5496ab66..2a230d01cd8ce30265c4135bfd461afccbdaf18d 100644 (file)
@@ -133,6 +133,9 @@ User API
     The following file operations are supported on both slave and
     master devices.
 
+    A userspace library libcxl is avaliable here:
+       https://github.com/ibm-capi/libcxl
+    This provides a C interface to this kernel API.
 
 open
 ----
@@ -366,6 +369,7 @@ Sysfs Class
     enumeration and tuning of the accelerators. Its layout is
     described in Documentation/ABI/testing/sysfs-class-cxl
 
+
 Udev rules
 ==========
 
diff --git a/Documentation/powerpc/dscr.txt b/Documentation/powerpc/dscr.txt
new file mode 100644 (file)
index 0000000..1ff4400
--- /dev/null
@@ -0,0 +1,83 @@
+                       DSCR (Data Stream Control Register)
+               ================================================
+
+DSCR register in powerpc allows user to have some control of prefetch of data
+stream in the processor. Please refer to the ISA documents or related manual
+for more detailed information regarding how to use this DSCR to attain this
+control of the pefetches . This document here provides an overview of kernel
+support for DSCR, related kernel objects, it's functionalities and exported
+user interface.
+
+(A) Data Structures:
+
+       (1) thread_struct:
+               dscr            /* Thread DSCR value */
+               dscr_inherit    /* Thread has changed default DSCR */
+
+       (2) PACA:
+               dscr_default    /* per-CPU DSCR default value */
+
+       (3) sysfs.c:
+               dscr_default    /* System DSCR default value */
+
+(B) Scheduler Changes:
+
+       Scheduler will write the per-CPU DSCR default which is stored in the
+       CPU's PACA value into the register if the thread has dscr_inherit value
+       cleared which means that it has not changed the default DSCR till now.
+       If the dscr_inherit value is set which means that it has changed the
+       default DSCR value, scheduler will write the changed value which will
+       now be contained in thread struct's dscr into the register instead of
+       the per-CPU default PACA based DSCR value.
+
+       NOTE: Please note here that the system wide global DSCR value never
+       gets used directly in the scheduler process context switch at all.
+
+(C) SYSFS Interface:
+
+       Global DSCR default:            /sys/devices/system/cpu/dscr_default
+       CPU specific DSCR default:      /sys/devices/system/cpu/cpuN/dscr
+
+       Changing the global DSCR default in the sysfs will change all the CPU
+       specific DSCR defaults immediately in their PACA structures. Again if
+       the current process has the dscr_inherit clear, it also writes the new
+       value into every CPU's DSCR register right away and updates the current
+       thread's DSCR value as well.
+
+       Changing the CPU specif DSCR default value in the sysfs does exactly
+       the same thing as above but unlike the global one above, it just changes
+       stuff for that particular CPU instead for all the CPUs on the system.
+
+(D) User Space Instructions:
+
+       The DSCR register can be accessed in the user space using any of these
+       two SPR numbers available for that purpose.
+
+       (1) Problem state SPR:          0x03    (Un-privileged, POWER8 only)
+       (2) Privileged state SPR:       0x11    (Privileged)
+
+       Accessing DSCR through privileged SPR number (0x11) from user space
+       works, as it is emulated following an illegal instruction exception
+       inside the kernel. Both mfspr and mtspr instructions are emulated.
+
+       Accessing DSCR through user level SPR (0x03) from user space will first
+       create a facility unavailable exception. Inside this exception handler
+       all mfspr isntruction based read attempts will get emulated and returned
+       where as the first mtspr instruction based write attempts will enable
+       the DSCR facility for the next time around (both for read and write) by
+       setting DSCR facility in the FSCR register.
+
+(E) Specifics about 'dscr_inherit':
+
+       The thread struct element 'dscr_inherit' represents whether the thread
+       in question has attempted and changed the DSCR itself using any of the
+       following methods. This element signifies whether the thread wants to
+       use the CPU default DSCR value or its own changed DSCR value in the
+       kernel.
+
+               (1) mtspr instruction   (SPR number 0x03)
+               (2) mtspr instruction   (SPR number 0x11)
+               (3) ptrace interface    (Explicitly set user DSCR value)
+
+       Any child of the process created after this event in the process inherits
+       this same behaviour as well.
index ded69794a5c09da762db7c4c61cc3d23e619d0fa..ba0a2a4a54ba1ffcb484786381b91f5113a62ad6 100644 (file)
@@ -74,22 +74,23 @@ Causes of transaction aborts
 Syscalls
 ========
 
-Performing syscalls from within transaction is not recommended, and can lead
-to unpredictable results.
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
 
-Syscalls do not by design abort transactions, but beware: The kernel code will
-not be running in transactional state.  The effect of syscalls will always
-remain visible, but depending on the call they may abort your transaction as a
-side-effect, read soon-to-be-aborted transactional data that should not remain
-invisible, etc.  If you constantly retry a transaction that constantly aborts
-itself by calling a syscall, you'll have a livelock & make no progress.
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
 
-Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
-from, say, printf() should be OK as long as the kernel does not access any
-memory that was accessed transactionally.
-
-Consider any syscalls that happen to work as debug-only -- not recommended for
-production use.  Best to queue them up till after the transaction is over.
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
 
 
 Signals
@@ -176,8 +177,7 @@ kernel aborted a transaction:
  TM_CAUSE_RESCHED       Thread was rescheduled.
  TM_CAUSE_TLBI          Software TLB invalid.
  TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL       Currently unused; future syscalls that must abort
-                        transactions for consistency will use this.
+ TM_CAUSE_SYSCALL       Syscall from active transaction.
  TM_CAUSE_SIGNAL        Signal delivered.
  TM_CAUSE_MISC          Currently unused.
  TM_CAUSE_ALIGNMENT     Alignment fault.
index 96978eced34154187acdd328fe403a14ca6200f7..1dd3fddfd3a1e536de39b69c37168dfc35553a4a 100644 (file)
@@ -289,10 +289,12 @@ PPC64 sPAPR implementation note
 
 This implementation has some specifics:
 
-1) Only one IOMMU group per container is supported as an IOMMU group
-represents the minimal entity which isolation can be guaranteed for and
-groups are allocated statically, one per a Partitionable Endpoint (PE)
+1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
+container is supported as an IOMMU table is allocated at the boot time,
+one table per a IOMMU group which is a Partitionable Endpoint (PE)
 (PE is often a PCI domain but not always).
+Newer systems (POWER8 with IODA2) have improved hardware design which allows
+to remove this limitation and have multiple IOMMU groups per a VFIO container.
 
 2) The hardware supports so called DMA windows - the PCI address range
 within which DMA transfer is allowed, any attempt to access address space
@@ -385,6 +387,18 @@ The code flow from the example above should be slightly changed:
 
        ....
 
+       /* Inject EEH error, which is expected to be caused by 32-bits
+        * config load.
+        */
+       pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+       pe_op.err.type = EEH_ERR_TYPE_32;
+       pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
+       pe_op.err.addr = 0ul;
+       pe_op.err.mask = 0ul;
+       ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+       ....
+
        /* When 0xFF's returned from reading PCI config space or IO BARs
         * of the PCI device. Check the PE's state to see if that has been
         * frozen.
@@ -427,6 +441,48 @@ The code flow from the example above should be slightly changed:
 
        ....
 
+5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/
+VFIO_IOMMU_DISABLE and implements 2 new ioctls:
+VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
+(which are unsupported in v1 IOMMU).
+
+PPC64 paravirtualized guests generate a lot of map/unmap requests,
+and the handling of those includes pinning/unpinning pages and updating
+mm::locked_vm counter to make sure we do not exceed the rlimit.
+The v2 IOMMU splits accounting and pinning into separate operations:
+
+- VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
+receive a user space address and size of the block to be pinned.
+Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
+be called with the exact address and size used for registering
+the memory block. The userspace is not expected to call these often.
+The ranges are stored in a linked list in a VFIO container.
+
+- VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
+IOMMU table and do not do pinning; instead these check that the userspace
+address is from pre-registered range.
+
+This separation helps in optimizing DMA for guests.
+
+6) sPAPR specification allows guests to have an additional DMA window(s) on
+a PCI bus with a variable page size. Two ioctls have been added to support
+this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
+The platform has to support the functionality or error will be returned to
+the userspace. The existing hardware supports up to 2 DMA windows, one is
+2GB long, uses 4K pages and called "default 32bit window"; the other can
+be as big as entire RAM, use different page size, it is optional - guests
+create those in run-time if the guest driver supports 64bit DMA.
+
+VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
+a number of TCE table levels (if a TCE table is going to be big enough and
+the kernel may not be able to allocate enough of physically contiguous memory).
+It creates a new window in the available slot and returns the bus address where
+the new window starts. Due to hardware limitation, the user space cannot choose
+the location of DMA windows.
+
+VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
+and removes it.
+
 -------------------------------------------------------------------------------
 
 [1] VFIO was originally an acronym for "Virtual Function I/O" in its
index acb64894c5948d62c6b5884e150eb922cbdc24b4..78a9174d5a487477df9d81a53ed3ec3f60db5e35 100644 (file)
@@ -2464,7 +2464,6 @@ F:        Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 CELL BROADBAND ENGINE ARCHITECTURE
 M:     Arnd Bergmann <arnd@arndb.de>
 L:     linuxppc-dev@lists.ozlabs.org
-L:     cbe-oss-dev@lists.ozlabs.org
 W:     http://www.ibm.com/developerworks/power/cell/
 S:     Supported
 F:     arch/powerpc/include/asm/cell*.h
@@ -2979,7 +2978,7 @@ M:        Michael Neuling <mikey@neuling.org>
 L:     linuxppc-dev@lists.ozlabs.org
 S:     Supported
 F:     drivers/misc/cxl/
-F:     include/misc/cxl.h
+F:     include/misc/cxl*
 F:     include/uapi/misc/cxl.h
 F:     Documentation/powerpc/cxl.txt
 F:     Documentation/powerpc/cxl.txt
@@ -7914,14 +7913,13 @@ F:      drivers/net/wireless/prism54/
 PS3 NETWORK SUPPORT
 M:     Geoff Levand <geoff@infradead.org>
 L:     netdev@vger.kernel.org
-L:     cbe-oss-dev@lists.ozlabs.org
+L:     linuxppc-dev@lists.ozlabs.org
 S:     Maintained
 F:     drivers/net/ethernet/toshiba/ps3_gelic_net.*
 
 PS3 PLATFORM SUPPORT
 M:     Geoff Levand <geoff@infradead.org>
 L:     linuxppc-dev@lists.ozlabs.org
-L:     cbe-oss-dev@lists.ozlabs.org
 S:     Maintained
 F:     arch/powerpc/boot/ps3*
 F:     arch/powerpc/include/asm/lv1call.h
@@ -7935,7 +7933,7 @@ F:        sound/ppc/snd_ps3*
 
 PS3VRAM DRIVER
 M:     Jim Paris <jim@jtan.com>
-L:     cbe-oss-dev@lists.ozlabs.org
+L:     linuxppc-dev@lists.ozlabs.org
 S:     Maintained
 F:     drivers/block/ps3vram.c
 
@@ -9420,7 +9418,6 @@ F:        drivers/net/ethernet/toshiba/spider_net*
 SPU FILE SYSTEM
 M:     Jeremy Kerr <jk@ozlabs.org>
 L:     linuxppc-dev@lists.ozlabs.org
-L:     cbe-oss-dev@lists.ozlabs.org
 W:     http://www.ibm.com/developerworks/power/cell/
 S:     Supported
 F:     Documentation/filesystems/spufs.txt
index 0efa8f90a8f1055f040c9a62360c97d4aa4253e0..3a510f4a6b68cfe56f711ed81d46fcf1b2e0ee72 100644 (file)
@@ -19,6 +19,14 @@ config PPC_WERROR
        depends on !PPC_DISABLE_WERROR
        default y
 
+config STRICT_MM_TYPECHECKS
+       bool "Do extra type checking on mm types"
+       default n
+       help
+         This option turns on extra type checking for some mm related types.
+
+         If you don't know what this means, say N.
+
 config PRINT_STACK_DEPTH
        int "Stack depth to print" if DEBUG_KERNEL
        default 64
index 07a480861f785295f51a28879c9ecfa9dad6955f..05f464eb69527e1d59dff1a13fdd449d6b7aa25f 100644 (file)
@@ -66,7 +66,10 @@ endif
 UTS_MACHINE := $(OLDARCH)
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC    += -mlittle-endian -mno-strict-align
+override CC    += -mlittle-endian
+ifneq ($(COMPILER),clang)
+override CC    += -mno-strict-align
+endif
 override AS    += -mlittle-endian
 override LD    += -EL
 override CROSS32CC += -mlittle-endian
@@ -113,14 +116,14 @@ else
 endif
 endif
 
-CFLAGS-$(CONFIG_PPC64) := -mtraceback=no
+CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no)
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc)
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
 else
-CFLAGS-$(CONFIG_PPC64) += -mcall-aixdesc
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
 endif
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc)
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
 CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
 
@@ -160,7 +163,8 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
 
 KBUILD_CPPFLAGS        += -Iarch/$(ARCH) $(asinstr)
 KBUILD_AFLAGS  += -Iarch/$(ARCH) $(AFLAGS-y)
-KBUILD_CFLAGS  += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y)
+KBUILD_CFLAGS  += $(call cc-option,-msoft-float)
+KBUILD_CFLAGS  += -pipe -Iarch/$(ARCH) $(CFLAGS-y)
 CPP            = $(CC) -E $(KBUILD_CFLAGS)
 
 CHECKFLAGS     += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__
@@ -192,7 +196,7 @@ KBUILD_CFLAGS       += $(call cc-option,-fno-dwarf2-cfi-asm)
 
 # Never use string load/store instructions as they are
 # often slow when they are implemented at all
-KBUILD_CFLAGS          += -mno-string
+KBUILD_CFLAGS          += $(call cc-option,-mno-string)
 
 ifeq ($(CONFIG_6xx),y)
 KBUILD_CFLAGS          += -mcpu=powerpc
@@ -269,6 +273,21 @@ bootwrapper_install:
 %.dtb: scripts
        $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
+# Used to create 'merged defconfigs'
+# To use it $(call) it with the first argument as the base defconfig
+# and the second argument as a space separated list of .config files to merge,
+# without the .config suffix.
+define merge_into_defconfig
+       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
+               -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
+               $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+       +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
+endef
+
+PHONY += pseries_le_defconfig
+pseries_le_defconfig:
+       $(call merge_into_defconfig,pseries_defconfig,le)
+
 define archhelp
   @echo '* zImage          - Build default images selected by kernel config'
   @echo '  zImage.*        - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
@@ -314,7 +333,8 @@ TOUT        := .tmp_gas_check
 # - Require gcc 4.0 or above on 64-bit
 # - gcc-4.2.0 has issues compiling modules on 64-bit
 checkbin:
-       @if test "$(cc-version)" = "0304" ; then \
+       @if test "${COMPILER}" != "clang" \
+           && test "$(cc-version)" = "0304" ; then \
                if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
                        echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
                        echo 'correctly with gcc-3.4 and your version of binutils.'; \
@@ -322,13 +342,15 @@ checkbin:
                        false; \
                fi ; \
        fi
-       @if test "$(cc-version)" -lt "0400" \
+       @if test "${COMPILER}" != "clang" \
+           && test "$(cc-version)" -lt "0400" \
            && test "x${CONFIG_PPC64}" = "xy" ; then \
                 echo -n "Sorry, GCC v4.0 or above is required to build " ; \
                 echo "the 64-bit powerpc kernel." ; \
                 false ; \
         fi
-       @if test "$(cc-fullversion)" = "040200" \
+       @if test "${COMPILER}" != "clang" \
+           && test "$(cc-fullversion)" = "040200" \
            && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
                echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
                echo 'kernel with modules enabled.' ; \
@@ -336,6 +358,14 @@ checkbin:
                echo 'disable kernel modules' ; \
                false ; \
        fi
+       @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
+           && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
+               echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
+               echo 'in some circumstances.' ; \
+               echo -n '*** Please use a different binutils version.' ; \
+               false ; \
+       fi
+
 
 CLEAN_FILES += $(TOUT)
 
index 24ed80dc2120d5df44092d0b613100289f978fed..559d00657fb5e0422747f34a190cf341047538f3 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 86161ae6c966e091ef32c053e8662f44b804d927..1ea8602e4345f4a3bde9bdd57f959cdd0c8c51ef 100644 (file)
                compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0";
        };
 
-/include/ "qoriq-clockgen2.dtsi"
        global-utilities@e1000 {
-               compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0";
-
-               mux0: mux0@0 {
-                       #clock-cells = <0>;
-                       reg = <0x0 0x4>;
-                       compatible = "fsl,qoriq-core-mux-2.0";
-                       clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
-                               <&pll1 0>, <&pll1 1>, <&pll1 2>;
-                       clock-names = "pll0", "pll0-div2", "pll0-div4",
-                               "pll1", "pll1-div2", "pll1-div4";
-                       clock-output-names = "cmux0";
-               };
+               compatible = "fsl,b4420-clockgen", "fsl,b4-clockgen",
+                             "fsl,qoriq-clockgen-2.0";
        };
 
        rcpm: global-utilities@e2000 {
index f35e9e0a54455793d306a2ede44141f0636a2f16..9ba904be39ee185cdd5d70ff0d9431d1a5bfcdad 100644 (file)
        };
 };
 
+&qportals {
+       qportal14: qman-portal@38000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+               interrupts = <132 0x2 0 0>;
+               cell-index = <0xe>;
+       };
+       qportal15: qman-portal@3c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+               interrupts = <134 0x2 0 0>;
+               cell-index = <0xf>;
+       };
+       qportal16: qman-portal@40000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+               interrupts = <136 0x2 0 0>;
+               cell-index = <0x10>;
+       };
+       qportal17: qman-portal@44000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+               interrupts = <138 0x2 0 0>;
+               cell-index = <0x11>;
+       };
+       qportal18: qman-portal@48000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+               interrupts = <140 0x2 0 0>;
+               cell-index = <0x12>;
+       };
+       qportal19: qman-portal@4c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+               interrupts = <142 0x2 0 0>;
+               cell-index = <0x13>;
+       };
+       qportal20: qman-portal@50000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+               interrupts = <144 0x2 0 0>;
+               cell-index = <0x14>;
+       };
+       qportal21: qman-portal@54000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+               interrupts = <146 0x2 0 0>;
+               cell-index = <0x15>;
+       };
+       qportal22: qman-portal@58000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+               interrupts = <148 0x2 0 0>;
+               cell-index = <0x16>;
+       };
+       qportal23: qman-portal@5c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+               interrupts = <150 0x2 0 0>;
+               cell-index = <0x17>;
+       };
+       qportal24: qman-portal@60000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+               interrupts = <152 0x2 0 0>;
+               cell-index = <0x18>;
+       };
+};
+
 &soc {
        ddr2: memory-controller@9000 {
                compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
                compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0";
        };
 
-/include/ "qoriq-clockgen2.dtsi"
        global-utilities@e1000 {
-               compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0";
-
-               mux0: mux0@0 {
-                       #clock-cells = <0>;
-                       reg = <0x0 0x4>;
-                       compatible = "fsl,qoriq-core-mux-2.0";
-                       clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
-                               <&pll1 0>, <&pll1 1>, <&pll1 2>;
-                       clock-names = "pll0", "pll0-div2", "pll0-div4",
-                               "pll1", "pll1-div2", "pll1-div4";
-                       clock-output-names = "cmux0";
-               };
+               compatible = "fsl,b4860-clockgen", "fsl,b4-clockgen",
+                             "fsl,qoriq-clockgen-2.0";
        };
 
        rcpm: global-utilities@e2000 {
index 73136c0029d223f1b19d372dd58b0891ed61c0b4..603910ac1db0e4d89ba5684eecae4fd8f8d2014f 100644 (file)
        alloc-ranges = <0 0 0x10000 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
        #address-cells = <2>;
        #size-cells = <1>;
        };
 };
 
+&qportals {
+       #address-cells = <0x1>;
+       #size-cells = <0x1>;
+       compatible = "simple-bus";
+
+       qportal0: qman-portal@0 {
+               compatible = "fsl,qman-portal";
+               reg = <0x0 0x4000>, <0x1000000 0x1000>;
+               interrupts = <104 0x2 0 0>;
+               cell-index = <0x0>;
+       };
+       qportal1: qman-portal@4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+               interrupts = <106 0x2 0 0>;
+               cell-index = <0x1>;
+       };
+       qportal2: qman-portal@8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+               interrupts = <108 0x2 0 0>;
+               cell-index = <0x2>;
+       };
+       qportal3: qman-portal@c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+               interrupts = <110 0x2 0 0>;
+               cell-index = <0x3>;
+       };
+       qportal4: qman-portal@10000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+               interrupts = <112 0x2 0 0>;
+               cell-index = <0x4>;
+       };
+       qportal5: qman-portal@14000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+               interrupts = <114 0x2 0 0>;
+               cell-index = <0x5>;
+       };
+       qportal6: qman-portal@18000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+               interrupts = <116 0x2 0 0>;
+               cell-index = <0x6>;
+       };
+       qportal7: qman-portal@1c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+               interrupts = <118 0x2 0 0>;
+               cell-index = <0x7>;
+       };
+       qportal8: qman-portal@20000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+               interrupts = <120 0x2 0 0>;
+               cell-index = <0x8>;
+       };
+       qportal9: qman-portal@24000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+               interrupts = <122 0x2 0 0>;
+               cell-index = <0x9>;
+       };
+       qportal10: qman-portal@28000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+               interrupts = <124 0x2 0 0>;
+               cell-index = <0xa>;
+       };
+       qportal11: qman-portal@2c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+               interrupts = <126 0x2 0 0>;
+               cell-index = <0xb>;
+       };
+       qportal12: qman-portal@30000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+               interrupts = <128 0x2 0 0>;
+               cell-index = <0xc>;
+       };
+       qportal13: qman-portal@34000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+               interrupts = <130 0x2 0 0>;
+               cell-index = <0xd>;
+       };
+};
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
                fsl,liodn-bits = <12>;
        };
 
+/include/ "qoriq-clockgen2.dtsi"
        clockgen: global-utilities@e1000 {
                compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0";
                reg = <0xe1000 0x1000>;
+
+               mux0: mux0@0 {
+                       #clock-cells = <0>;
+                       reg = <0x0 0x4>;
+                       compatible = "fsl,qoriq-core-mux-2.0";
+                       clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+                               <&pll1 0>, <&pll1 1>, <&pll1 2>;
+                       clock-names = "pll0", "pll0-div2", "pll0-div4",
+                               "pll1", "pll1-div2", "pll1-div4";
+                       clock-output-names = "cmux0";
+               };
        };
 
        rcpm: global-utilities@e2000 {
 /include/ "qoriq-duart-1.dtsi"
 /include/ "qoriq-sec5.3-0.dtsi"
 
+/include/ "qoriq-qman3.dtsi"
+       qman: qman@318000 {
+               interrupts = <16 2 1 28>;
+       };
+
 /include/ "qoriq-bman1.dtsi"
        bman: bman@31a000 {
                interrupts = <16 2 1 29>;
index 7780f21430cba57b61f4f332e20f57cbc5ff553a..da6d3fc6ba41501f31a42389464659d33e469e14 100644 (file)
        alloc-ranges = <0 0 0x10 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
        #address-cells = <2>;
        #size-cells = <1>;
        };
 };
 
+&qportals {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "simple-bus";
+
+       qportal0: qman-portal@0 {
+               compatible = "fsl,qman-portal";
+               reg = <0x0 0x4000>, <0x100000 0x1000>;
+               interrupts = <29 2 0 0>;
+               cell-index = <0>;
+       };
+       qportal1: qman-portal@4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4000 0x4000>, <0x101000 0x1000>;
+               interrupts = <31 2 0 0>;
+               cell-index = <1>;
+       };
+       qportal2: qman-portal@8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x8000 0x4000>, <0x102000 0x1000>;
+               interrupts = <33 2 0 0>;
+               cell-index = <2>;
+       };
+};
+
 &bportals {
        #address-cells = <1>;
        #size-cells = <1>;
 /include/ "pq3-mpic.dtsi"
 /include/ "pq3-mpic-timer-B.dtsi"
 
+       qman: qman@88000 {
+               compatible = "fsl,qman";
+               reg = <0x88000 0x1000>;
+               interrupts = <16 2 0 0>;
+               fsl,qman-portals = <&qportals>;
+               memory-region = <&qman_fqd &qman_pfdr>;
+       };
+
        bman: bman@8a000 {
                compatible = "fsl,bman";
                reg = <0x8a000 0x1000>;
index b6a0e88ee5ce02a0f97f9afc3cacefe710c42557..04ad177b6a12fd609c3789536b923c24532aaea6 100644 (file)
        alloc-ranges = <0 0 0x10 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
        compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus";
        interrupts = <25 2 0 0>;
 
 /include/ "qoriq-bman1-portals.dtsi"
 
+/include/ "qoriq-qman1-portals.dtsi"
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
@@ -416,5 +428,6 @@ crypto: crypto@300000 {
                fsl,iommu-parent = <&pamu1>;
        };
 
+/include/ "qoriq-qman1.dtsi"
 /include/ "qoriq-bman1.dtsi"
 };
index cf18f7bf824fa3d35ce1deca205e6de42050718f..2cab18af6df2a24ed4ae523a080ee9e36dce8cc1 100644 (file)
        alloc-ranges = <0 0 0x10 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
        compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus";
        interrupts = <25 2 0 0>;
 
 /include/ "qoriq-bman1-portals.dtsi"
 
+/include/ "qoriq-qman1-portals.dtsi"
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
@@ -443,5 +455,6 @@ crypto: crypto@300000 {
                fsl,iommu-parent = <&pamu1>;
        };
 
+/include/ "qoriq-qman1.dtsi"
 /include/ "qoriq-bman1.dtsi"
 };
index 90431c0b53ad8dbefe460421bdfcbfd11b6ff5ac..dfc76bc41cb26cd2e0a2ecc68589fefaea579208 100644 (file)
        alloc-ranges = <0 0 0x10 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10 0>;
+};
+
 &lbc {
        compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus";
        interrupts = <25 2 0 0>;
 
 /include/ "qoriq-bman1-portals.dtsi"
 
+/include/ "qoriq-qman1-portals.dtsi"
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
@@ -499,5 +511,6 @@ crypto: crypto@300000 {
                fsl,iommu-parent = <&pamu1>;
        };
 
+/include/ "qoriq-qman1.dtsi"
 /include/ "qoriq-bman1.dtsi"
 };
index 8be61d11349efe55acd35074ed2c1aa417abb324..b77923ad72cf5682177b721ae75fe85cfa04152d 100644 (file)
        alloc-ranges = <0 0 0x10000 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
 &lbc {
        compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus";
        interrupts = <25 2 0 0>;
 
 /include/ "qoriq-bman1-portals.dtsi"
 
+/include/ "qoriq-qman1-portals.dtsi"
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
                fsl,iommu-parent = <&pamu1>;
        };
 
+/include/ "qoriq-qman1.dtsi"
 /include/ "qoriq-bman1.dtsi"
 
 /include/ "qoriq-raid1.0-0.dtsi"
index 48e232f2d50d56d82bac1532f8644d02ccc78506..6d214526b81ba56d7e2f6f24b8ffa520e519b281 100644 (file)
        alloc-ranges = <0 0 0x10000 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
 &lbc {
        compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus";
        interrupts = <25 2 0 0>;
 
 /include/ "qoriq-bman1-portals.dtsi"
 
+/include/ "qoriq-qman1-portals.dtsi"
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
                fsl,iommu-parent = <&pamu4>;
        };
 
+/include/ "qoriq-qman1.dtsi"
 /include/ "qoriq-bman1.dtsi"
 };
index 05d51acafa67a960caa975e997ebaad9d9b3ac0a..e77e4b4ed53b73911fd41a24012f313798c86307 100644 (file)
                compatible = "fsl,qman-portal";
                reg = <0x0 0x4000>, <0x100000 0x1000>;
                interrupts = <104 2 0 0>;
-               fsl,qman-channel-id = <0x0>;
+               cell-index = <0x0>;
        };
        qportal1: qman-portal@4000 {
                compatible = "fsl,qman-portal";
                reg = <0x4000 0x4000>, <0x101000 0x1000>;
                interrupts = <106 2 0 0>;
-               fsl,qman-channel-id = <1>;
+               cell-index = <1>;
        };
        qportal2: qman-portal@8000 {
                compatible = "fsl,qman-portal";
                reg = <0x8000 0x4000>, <0x102000 0x1000>;
                interrupts = <108 2 0 0>;
-               fsl,qman-channel-id = <2>;
+               cell-index = <2>;
        };
        qportal3: qman-portal@c000 {
                compatible = "fsl,qman-portal";
                reg = <0xc000 0x4000>, <0x103000 0x1000>;
                interrupts = <110 2 0 0>;
-               fsl,qman-channel-id = <3>;
+               cell-index = <3>;
        };
        qportal4: qman-portal@10000 {
                compatible = "fsl,qman-portal";
                reg = <0x10000 0x4000>, <0x104000 0x1000>;
                interrupts = <112 2 0 0>;
-               fsl,qman-channel-id = <4>;
+               cell-index = <4>;
        };
        qportal5: qman-portal@14000 {
                compatible = "fsl,qman-portal";
                reg = <0x14000 0x4000>, <0x105000 0x1000>;
                interrupts = <114 2 0 0>;
-               fsl,qman-channel-id = <5>;
+               cell-index = <5>;
        };
        qportal6: qman-portal@18000 {
                compatible = "fsl,qman-portal";
                reg = <0x18000 0x4000>, <0x106000 0x1000>;
                interrupts = <116 2 0 0>;
-               fsl,qman-channel-id = <6>;
+               cell-index = <6>;
        };
 
        qportal7: qman-portal@1c000 {
                compatible = "fsl,qman-portal";
                reg = <0x1c000 0x4000>, <0x107000 0x1000>;
                interrupts = <118 2 0 0>;
-               fsl,qman-channel-id = <7>;
+               cell-index = <7>;
        };
        qportal8: qman-portal@20000 {
                compatible = "fsl,qman-portal";
                reg = <0x20000 0x4000>, <0x108000 0x1000>;
                interrupts = <120 2 0 0>;
-               fsl,qman-channel-id = <8>;
+               cell-index = <8>;
        };
        qportal9: qman-portal@24000 {
                compatible = "fsl,qman-portal";
                reg = <0x24000 0x4000>, <0x109000 0x1000>;
                interrupts = <122 2 0 0>;
-               fsl,qman-channel-id = <9>;
+               cell-index = <9>;
        };
 };
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
new file mode 100644 (file)
index 0000000..df1f068
--- /dev/null
@@ -0,0 +1,330 @@
+/*
+ * T1023 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+       #address-cells = <2>;
+       #size-cells = <1>;
+       compatible = "fsl,ifc", "simple-bus";
+       interrupts = <25 2 0 0>;
+};
+
+&pci0 {
+       compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+       device_type = "pci";
+       #size-cells = <2>;
+       #address-cells = <3>;
+       bus-range = <0x0 0xff>;
+       interrupts = <20 2 0 0>;
+       fsl,iommu-parent = <&pamu0>;
+       pcie@0 {
+               reg = <0 0 0 0 0>;
+               #interrupt-cells = <1>;
+               #size-cells = <2>;
+               #address-cells = <3>;
+               device_type = "pci";
+               interrupts = <20 2 0 0>;
+               interrupt-map-mask = <0xf800 0 0 7>;
+               interrupt-map = <
+                       /* IDSEL 0x0 */
+                       0000 0 0 1 &mpic 40 1 0 0
+                       0000 0 0 2 &mpic 1 1 0 0
+                       0000 0 0 3 &mpic 2 1 0 0
+                       0000 0 0 4 &mpic 3 1 0 0
+                       >;
+       };
+};
+
+&pci1 {
+       compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+       device_type = "pci";
+       #size-cells = <2>;
+       #address-cells = <3>;
+       bus-range = <0 0xff>;
+       interrupts = <21 2 0 0>;
+       fsl,iommu-parent = <&pamu0>;
+       pcie@0 {
+               reg = <0 0 0 0 0>;
+               #interrupt-cells = <1>;
+               #size-cells = <2>;
+               #address-cells = <3>;
+               device_type = "pci";
+               interrupts = <21 2 0 0>;
+               interrupt-map-mask = <0xf800 0 0 7>;
+               interrupt-map = <
+                       /* IDSEL 0x0 */
+                       0000 0 0 1 &mpic 41 1 0 0
+                       0000 0 0 2 &mpic 5 1 0 0
+                       0000 0 0 3 &mpic 6 1 0 0
+                       0000 0 0 4 &mpic 7 1 0 0
+                       >;
+       };
+};
+
+&pci2 {
+       compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+       device_type = "pci";
+       #size-cells = <2>;
+       #address-cells = <3>;
+       bus-range = <0x0 0xff>;
+       interrupts = <22 2 0 0>;
+       fsl,iommu-parent = <&pamu0>;
+       pcie@0 {
+               reg = <0 0 0 0 0>;
+               #interrupt-cells = <1>;
+               #size-cells = <2>;
+               #address-cells = <3>;
+               device_type = "pci";
+               interrupts = <22 2 0 0>;
+               interrupt-map-mask = <0xf800 0 0 7>;
+               interrupt-map = <
+                       /* IDSEL 0x0 */
+                       0000 0 0 1 &mpic 42 1 0 0
+                       0000 0 0 2 &mpic 9 1 0 0
+                       0000 0 0 3 &mpic 10 1 0 0
+                       0000 0 0 4 &mpic 11 1 0 0
+                       >;
+       };
+};
+
+&dcsr {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,dcsr", "simple-bus";
+
+       dcsr-epu@0 {
+               compatible = "fsl,t1023-dcsr-epu", "fsl,dcsr-epu";
+               interrupts = <52 2 0 0
+                             84 2 0 0
+                             85 2 0 0>;
+               reg = <0x0 0x1000>;
+       };
+       dcsr-npc {
+               compatible = "fsl,t1023-dcsr-cnpc", "fsl,dcsr-cnpc";
+               reg = <0x1000 0x1000 0x1002000 0x10000>;
+       };
+       dcsr-nxc@2000 {
+               compatible = "fsl,dcsr-nxc";
+               reg = <0x2000 0x1000>;
+       };
+       dcsr-corenet {
+               compatible = "fsl,dcsr-corenet";
+               reg = <0x8000 0x1000 0x1A000 0x1000>;
+       };
+       dcsr-ocn@11000 {
+               compatible = "fsl,t1023-dcsr-ocn", "fsl,dcsr-ocn";
+               reg = <0x11000 0x1000>;
+       };
+       dcsr-ddr@12000 {
+               compatible = "fsl,dcsr-ddr";
+               dev-handle = <&ddr1>;
+               reg = <0x12000 0x1000>;
+       };
+       dcsr-nal@18000 {
+               compatible = "fsl,t1023-dcsr-nal", "fsl,dcsr-nal";
+               reg = <0x18000 0x1000>;
+       };
+       dcsr-rcpm@22000 {
+               compatible = "fsl,t1023-dcsr-rcpm", "fsl,dcsr-rcpm";
+               reg = <0x22000 0x1000>;
+       };
+       dcsr-snpc@30000 {
+               compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc";
+               reg = <0x30000 0x1000 0x1022000 0x10000>;
+       };
+       dcsr-snpc@31000 {
+               compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc";
+               reg = <0x31000 0x1000 0x1042000 0x10000>;
+       };
+       dcsr-cpu-sb-proxy@100000 {
+               compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+               cpu-handle = <&cpu0>;
+               reg = <0x100000 0x1000 0x101000 0x1000>;
+       };
+       dcsr-cpu-sb-proxy@108000 {
+               compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+               cpu-handle = <&cpu1>;
+               reg = <0x108000 0x1000 0x109000 0x1000>;
+       };
+};
+
+&soc {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       device_type = "soc";
+       compatible = "simple-bus";
+
+       soc-sram-error {
+               compatible = "fsl,soc-sram-error";
+               interrupts = <16 2 1 29>;
+       };
+
+       corenet-law@0 {
+               compatible = "fsl,corenet-law";
+               reg = <0x0 0x1000>;
+               fsl,num-laws = <16>;
+       };
+
+       ddr1: memory-controller@8000 {
+               compatible = "fsl,qoriq-memory-controller-v5.0",
+                               "fsl,qoriq-memory-controller";
+               reg = <0x8000 0x1000>;
+               interrupts = <16 2 1 23>;
+       };
+
+       cpc: l3-cache-controller@10000 {
+               compatible = "fsl,t1023-l3-cache-controller", "cache";
+               reg = <0x10000 0x1000>;
+               interrupts = <16 2 1 27>;
+       };
+
+       corenet-cf@18000 {
+               compatible = "fsl,corenet2-cf";
+               reg = <0x18000 0x1000>;
+               interrupts = <16 2 1 31>;
+       };
+
+       iommu@20000 {
+               compatible = "fsl,pamu-v1.0", "fsl,pamu";
+               reg = <0x20000 0x1000>;
+               ranges = <0 0x20000 0x1000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               interrupts = <
+                       24 2 0 0
+                       16 2 1 30>;
+               pamu0: pamu@0 {
+                       reg = <0 0x1000>;
+                       fsl,primary-cache-geometry = <128 1>;
+                       fsl,secondary-cache-geometry = <32 2>;
+               };
+       };
+
+/include/ "qoriq-mpic.dtsi"
+
+       guts: global-utilities@e0000 {
+               compatible = "fsl,t1023-device-config", "fsl,qoriq-device-config-2.0";
+               reg = <0xe0000 0xe00>;
+               fsl,has-rstcr;
+               fsl,liodn-bits = <12>;
+       };
+
+/include/ "qoriq-clockgen2.dtsi"
+       global-utilities@e1000 {
+               compatible = "fsl,t1023-clockgen", "fsl,qoriq-clockgen-2.0";
+               mux0: mux0@0 {
+                       #clock-cells = <0>;
+                       reg = <0x0 4>;
+                       compatible = "fsl,core-mux-clock";
+                       clocks = <&pll0 0>, <&pll0 1>;
+                       clock-names = "pll0_0", "pll0_1";
+                       clock-output-names = "cmux0";
+               };
+               mux1: mux1@20 {
+                       #clock-cells = <0>;
+                       reg = <0x20 4>;
+                       compatible = "fsl,core-mux-clock";
+                       clocks = <&pll0 0>, <&pll0 1>;
+                       clock-names = "pll0_0", "pll0_1";
+                       clock-output-names = "cmux1";
+               };
+       };
+
+       rcpm: global-utilities@e2000 {
+               compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.0";
+               reg = <0xe2000 0x1000>;
+       };
+
+       sfp: sfp@e8000 {
+               compatible = "fsl,t1023-sfp";
+               reg = <0xe8000 0x1000>;
+       };
+
+       serdes: serdes@ea000 {
+               compatible = "fsl,t1023-serdes";
+               reg = <0xea000 0x4000>;
+       };
+
+       scfg: global-utilities@fc000 {
+               compatible = "fsl,t1023-scfg";
+               reg = <0xfc000 0x1000>;
+       };
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+
+/include/ "qoriq-espi-0.dtsi"
+       spi@110000 {
+               fsl,espi-num-chipselects = <4>;
+       };
+
+/include/ "qoriq-esdhc-0.dtsi"
+       sdhc@114000 {
+               compatible = "fsl,t1023-esdhc", "fsl,esdhc";
+               fsl,iommu-parent = <&pamu0>;
+               fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+               sdhci,auto-cmd12;
+               no-1-8-v;
+       };
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+       usb0: usb@210000 {
+               compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
+               fsl,iommu-parent = <&pamu0>;
+               fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+               phy_type = "utmi";
+               port0;
+       };
+/include/ "qoriq-usb2-dr-0.dtsi"
+       usb1: usb@211000 {
+               compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
+               fsl,iommu-parent = <&pamu0>;
+               fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+               dr_mode = "host";
+               phy_type = "utmi";
+       };
+/include/ "qoriq-sata2-0.dtsi"
+       sata@220000 {
+               fsl,iommu-parent = <&pamu0>;
+               fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+       };
+
+/include/ "qoriq-sec5.0-0.dtsi"
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
new file mode 100644 (file)
index 0000000..95e3af8
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * T1024 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t1023si-post.dtsi"
+
+/ {
+       aliases {
+               vga = &display;
+               display = &display;
+       };
+
+       qe:qe@ffe140000 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               device_type = "qe";
+               compatible = "fsl,qe";
+               ranges = <0x0 0xf 0xfe140000 0x40000>;
+               reg = <0xf 0xfe140000 0 0x480>;
+               fsl,qe-num-riscs = <1>;
+               fsl,qe-num-snums = <28>;
+               brg-frequency = <0>;
+               bus-frequency = <0>;
+       };
+};
+
+&soc {
+       display:display@180000 {
+               compatible = "fsl,t1024-diu", "fsl,diu";
+               reg = <0x180000 1000>;
+               interrupts = <74 2 0 0>;
+       };
+};
+
+&qe {
+       qeic: interrupt-controller@80 {
+               interrupt-controller;
+               compatible = "fsl,qe-ic";
+               #address-cells = <0>;
+               #interrupt-cells = <1>;
+               reg = <0x80 0x80>;
+               interrupts = <95 2 0 0  94 2 0 0>; //high:79 low:78
+       };
+
+       ucc@2000 {
+               cell-index = <1>;
+               reg = <0x2000 0x200>;
+               interrupts = <32>;
+               interrupt-parent = <&qeic>;
+       };
+
+       ucc@2200 {
+               cell-index = <3>;
+               reg = <0x2200 0x200>;
+               interrupts = <34>;
+               interrupt-parent = <&qeic>;
+       };
+
+       muram@10000 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "fsl,qe-muram", "fsl,cpm-muram";
+               ranges = <0x0 0x10000 0x6000>;
+
+               data-only@0 {
+                       compatible = "fsl,qe-muram-data", "fsl,cpm-muram-data";
+                       reg = <0x0 0x6000>;
+               };
+       };
+};
diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
new file mode 100644 (file)
index 0000000..1f1a9f8
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * T1024/T1023 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+       #address-cells = <2>;
+       #size-cells = <2>;
+       interrupt-parent = <&mpic>;
+
+       aliases {
+               ccsr = &soc;
+               dcsr = &dcsr;
+
+               dma0 = &dma0;
+               dma1 = &dma1;
+               serial0 = &serial0;
+               serial1 = &serial1;
+               serial2 = &serial2;
+               serial3 = &serial3;
+               pci0 = &pci0;
+               pci1 = &pci1;
+               pci2 = &pci2;
+               usb0 = &usb0;
+               usb1 = &usb1;
+               sdhc = &sdhc;
+
+               crypto = &crypto;
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu0: PowerPC,e5500@0 {
+                       device_type = "cpu";
+                       reg = <0>;
+                       clocks = <&mux0>;
+                       next-level-cache = <&L2_1>;
+                       L2_1: l2-cache {
+                               next-level-cache = <&cpc>;
+                       };
+               };
+               cpu1: PowerPC,e5500@1 {
+                       device_type = "cpu";
+                       reg = <1>;
+                       clocks = <&mux1>;
+                       next-level-cache = <&L2_2>;
+                       L2_2: l2-cache {
+                               next-level-cache = <&cpc>;
+                       };
+               };
+       };
+};
index 5cc01be5b1527cadb8b6a385b893fe7a5cfd1223..9e9f7e201d43a2234a8483f36f12b7e8ac0764fd 100644 (file)
        alloc-ranges = <0 0 0x10000 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
        #address-cells = <2>;
        #size-cells = <1>;
        };
 };
 
+&qportals {
+       #address-cells = <0x1>;
+       #size-cells = <0x1>;
+       compatible = "simple-bus";
+
+       qportal0: qman-portal@0 {
+               compatible = "fsl,qman-portal";
+               reg = <0x0 0x4000>, <0x1000000 0x1000>;
+               interrupts = <104 0x2 0 0>;
+               cell-index = <0x0>;
+       };
+       qportal1: qman-portal@4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+               interrupts = <106 0x2 0 0>;
+               cell-index = <0x1>;
+       };
+       qportal2: qman-portal@8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+               interrupts = <108 0x2 0 0>;
+               cell-index = <0x2>;
+       };
+       qportal3: qman-portal@c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+               interrupts = <110 0x2 0 0>;
+               cell-index = <0x3>;
+       };
+       qportal4: qman-portal@10000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+               interrupts = <112 0x2 0 0>;
+               cell-index = <0x4>;
+       };
+       qportal5: qman-portal@14000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+               interrupts = <114 0x2 0 0>;
+               cell-index = <0x5>;
+       };
+       qportal6: qman-portal@18000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+               interrupts = <116 0x2 0 0>;
+               cell-index = <0x6>;
+       };
+       qportal7: qman-portal@1c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+               interrupts = <118 0x2 0 0>;
+               cell-index = <0x7>;
+       };
+       qportal8: qman-portal@20000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+               interrupts = <120 0x2 0 0>;
+               cell-index = <0x8>;
+       };
+       qportal9: qman-portal@24000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+               interrupts = <122 0x2 0 0>;
+               cell-index = <0x9>;
+       };
+};
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
                fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
        };
 /include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
 /include/ "qoriq-bman1.dtsi"
 };
index 86bdaf6cbd141c0309524166e97e874e4e69647d..32c790ae7fde70f1762f72df2c0c5ad136919522 100644 (file)
        alloc-ranges = <0 0 0x10000 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
        #address-cells = <2>;
        #size-cells = <1>;
        };
 };
 
+&qportals {
+       #address-cells = <0x1>;
+       #size-cells = <0x1>;
+       compatible = "simple-bus";
+
+       qportal0: qman-portal@0 {
+               compatible = "fsl,qman-portal";
+               reg = <0x0 0x4000>, <0x1000000 0x1000>;
+               interrupts = <104 0x2 0 0>;
+               cell-index = <0x0>;
+       };
+       qportal1: qman-portal@4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+               interrupts = <106 0x2 0 0>;
+               cell-index = <0x1>;
+       };
+       qportal2: qman-portal@8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+               interrupts = <108 0x2 0 0>;
+               cell-index = <0x2>;
+       };
+       qportal3: qman-portal@c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+               interrupts = <110 0x2 0 0>;
+               cell-index = <0x3>;
+       };
+       qportal4: qman-portal@10000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+               interrupts = <112 0x2 0 0>;
+               cell-index = <0x4>;
+       };
+       qportal5: qman-portal@14000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+               interrupts = <114 0x2 0 0>;
+               cell-index = <0x5>;
+       };
+       qportal6: qman-portal@18000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+               interrupts = <116 0x2 0 0>;
+               cell-index = <0x6>;
+       };
+       qportal7: qman-portal@1c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+               interrupts = <118 0x2 0 0>;
+               cell-index = <0x7>;
+       };
+       qportal8: qman-portal@20000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+               interrupts = <120 0x2 0 0>;
+               cell-index = <0x8>;
+       };
+       qportal9: qman-portal@24000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+               interrupts = <122 0x2 0 0>;
+               cell-index = <0x9>;
+       };
+       qportal10: qman-portal@28000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+               interrupts = <124 0x2 0 0>;
+               cell-index = <0xa>;
+       };
+       qportal11: qman-portal@2c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+               interrupts = <126 0x2 0 0>;
+               cell-index = <0xb>;
+       };
+       qportal12: qman-portal@30000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+               interrupts = <128 0x2 0 0>;
+               cell-index = <0xc>;
+       };
+       qportal13: qman-portal@34000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+               interrupts = <130 0x2 0 0>;
+               cell-index = <0xd>;
+       };
+       qportal14: qman-portal@38000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+               interrupts = <132 0x2 0 0>;
+               cell-index = <0xe>;
+       };
+       qportal15: qman-portal@3c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+               interrupts = <134 0x2 0 0>;
+               cell-index = <0xf>;
+       };
+       qportal16: qman-portal@40000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+               interrupts = <136 0x2 0 0>;
+               cell-index = <0x10>;
+       };
+       qportal17: qman-portal@44000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+               interrupts = <138 0x2 0 0>;
+               cell-index = <0x11>;
+       };
+};
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
                        compatible = "fsl,qoriq-core-mux-2.0";
                        clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
                                 <&pll1 0>, <&pll1 1>, <&pll1 2>;
-                       clock-names = "pll0", "pll0-div2", "pll1-div4",
+                       clock-names = "pll0", "pll0-div2", "pll0-div4",
                                "pll1", "pll1-div2", "pll1-div4";
                        clock-output-names = "cmux0";
                };
                        compatible = "fsl,qoriq-core-mux-2.0";
                        clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
                                 <&pll1 0>, <&pll1 1>, <&pll1 2>;
-                       clock-names = "pll0", "pll0-div2", "pll1-div4",
+                       clock-names = "pll0", "pll0-div2", "pll0-div4",
                                "pll1", "pll1-div2", "pll1-div4";
                        clock-output-names = "cmux1";
                };
                phy_type = "utmi";
        };
 /include/ "qoriq-sec5.2-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
 /include/ "qoriq-bman1.dtsi"
 
        L2_1: l2-cache-controller@c20000 {
index 4d4f25895d8c4a9f62d471e2189143892d6d7287..d806360d0f64b4907d4790b995f222b2ae355575 100644 (file)
        alloc-ranges = <0 0 0x10000 0>;
 };
 
+&qman_fqd {
+       compatible = "fsl,qman-fqd";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+       compatible = "fsl,qman-pfdr";
+       alloc-ranges = <0 0 0x10000 0>;
+};
+
 &ifc {
        #address-cells = <2>;
        #size-cells = <1>;
        };
 };
 
+&qportals {
+       #address-cells = <0x1>;
+       #size-cells = <0x1>;
+       compatible = "simple-bus";
+
+       qportal0: qman-portal@0 {
+               compatible = "fsl,qman-portal";
+               reg = <0x0 0x4000>, <0x1000000 0x1000>;
+               interrupts = <104 0x2 0 0>;
+               cell-index = <0x0>;
+       };
+       qportal1: qman-portal@4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+               interrupts = <106 0x2 0 0>;
+               cell-index = <0x1>;
+       };
+       qportal2: qman-portal@8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+               interrupts = <108 0x2 0 0>;
+               cell-index = <0x2>;
+       };
+       qportal3: qman-portal@c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+               interrupts = <110 0x2 0 0>;
+               cell-index = <0x3>;
+       };
+       qportal4: qman-portal@10000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+               interrupts = <112 0x2 0 0>;
+               cell-index = <0x4>;
+       };
+       qportal5: qman-portal@14000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+               interrupts = <114 0x2 0 0>;
+               cell-index = <0x5>;
+       };
+       qportal6: qman-portal@18000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+               interrupts = <116 0x2 0 0>;
+               cell-index = <0x6>;
+       };
+       qportal7: qman-portal@1c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+               interrupts = <118 0x2 0 0>;
+               cell-index = <0x7>;
+       };
+       qportal8: qman-portal@20000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+               interrupts = <120 0x2 0 0>;
+               cell-index = <0x8>;
+       };
+       qportal9: qman-portal@24000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+               interrupts = <122 0x2 0 0>;
+               cell-index = <0x9>;
+       };
+       qportal10: qman-portal@28000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+               interrupts = <124 0x2 0 0>;
+               cell-index = <0xa>;
+       };
+       qportal11: qman-portal@2c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+               interrupts = <126 0x2 0 0>;
+               cell-index = <0xb>;
+       };
+       qportal12: qman-portal@30000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+               interrupts = <128 0x2 0 0>;
+               cell-index = <0xc>;
+       };
+       qportal13: qman-portal@34000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+               interrupts = <130 0x2 0 0>;
+               cell-index = <0xd>;
+       };
+       qportal14: qman-portal@38000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+               interrupts = <132 0x2 0 0>;
+               cell-index = <0xe>;
+       };
+       qportal15: qman-portal@3c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+               interrupts = <134 0x2 0 0>;
+               cell-index = <0xf>;
+       };
+       qportal16: qman-portal@40000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+               interrupts = <136 0x2 0 0>;
+               cell-index = <0x10>;
+       };
+       qportal17: qman-portal@44000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+               interrupts = <138 0x2 0 0>;
+               cell-index = <0x11>;
+       };
+       qportal18: qman-portal@48000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+               interrupts = <140 0x2 0 0>;
+               cell-index = <0x12>;
+       };
+       qportal19: qman-portal@4c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+               interrupts = <142 0x2 0 0>;
+               cell-index = <0x13>;
+       };
+       qportal20: qman-portal@50000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+               interrupts = <144 0x2 0 0>;
+               cell-index = <0x14>;
+       };
+       qportal21: qman-portal@54000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+               interrupts = <146 0x2 0 0>;
+               cell-index = <0x15>;
+       };
+       qportal22: qman-portal@58000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+               interrupts = <148 0x2 0 0>;
+               cell-index = <0x16>;
+       };
+       qportal23: qman-portal@5c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+               interrupts = <150 0x2 0 0>;
+               cell-index = <0x17>;
+       };
+       qportal24: qman-portal@60000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+               interrupts = <152 0x2 0 0>;
+               cell-index = <0x18>;
+       };
+       qportal25: qman-portal@64000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+               interrupts = <154 0x2 0 0>;
+               cell-index = <0x19>;
+       };
+       qportal26: qman-portal@68000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+               interrupts = <156 0x2 0 0>;
+               cell-index = <0x1a>;
+       };
+       qportal27: qman-portal@6c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+               interrupts = <158 0x2 0 0>;
+               cell-index = <0x1b>;
+       };
+       qportal28: qman-portal@70000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+               interrupts = <160 0x2 0 0>;
+               cell-index = <0x1c>;
+       };
+       qportal29: qman-portal@74000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+               interrupts = <162 0x2 0 0>;
+               cell-index = <0x1d>;
+       };
+       qportal30: qman-portal@78000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+               interrupts = <164 0x2 0 0>;
+               cell-index = <0x1e>;
+       };
+       qportal31: qman-portal@7c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+               interrupts = <166 0x2 0 0>;
+               cell-index = <0x1f>;
+       };
+       qportal32: qman-portal@80000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+               interrupts = <168 0x2 0 0>;
+               cell-index = <0x20>;
+       };
+       qportal33: qman-portal@84000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+               interrupts = <170 0x2 0 0>;
+               cell-index = <0x21>;
+       };
+       qportal34: qman-portal@88000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+               interrupts = <172 0x2 0 0>;
+               cell-index = <0x22>;
+       };
+       qportal35: qman-portal@8c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+               interrupts = <174 0x2 0 0>;
+               cell-index = <0x23>;
+       };
+       qportal36: qman-portal@90000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+               interrupts = <384 0x2 0 0>;
+               cell-index = <0x24>;
+       };
+       qportal37: qman-portal@94000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+               interrupts = <386 0x2 0 0>;
+               cell-index = <0x25>;
+       };
+       qportal38: qman-portal@98000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+               interrupts = <388 0x2 0 0>;
+               cell-index = <0x26>;
+       };
+       qportal39: qman-portal@9c000 {
+               compatible = "fsl,qman-portal";
+               reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+               interrupts = <390 0x2 0 0>;
+               cell-index = <0x27>;
+       };
+       qportal40: qman-portal@a0000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+               interrupts = <392 0x2 0 0>;
+               cell-index = <0x28>;
+       };
+       qportal41: qman-portal@a4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+               interrupts = <394 0x2 0 0>;
+               cell-index = <0x29>;
+       };
+       qportal42: qman-portal@a8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+               interrupts = <396 0x2 0 0>;
+               cell-index = <0x2a>;
+       };
+       qportal43: qman-portal@ac000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+               interrupts = <398 0x2 0 0>;
+               cell-index = <0x2b>;
+       };
+       qportal44: qman-portal@b0000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+               interrupts = <400 0x2 0 0>;
+               cell-index = <0x2c>;
+       };
+       qportal45: qman-portal@b4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+               interrupts = <402 0x2 0 0>;
+               cell-index = <0x2d>;
+       };
+       qportal46: qman-portal@b8000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+               interrupts = <404 0x2 0 0>;
+               cell-index = <0x2e>;
+       };
+       qportal47: qman-portal@bc000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+               interrupts = <406 0x2 0 0>;
+               cell-index = <0x2f>;
+       };
+       qportal48: qman-portal@c0000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+               interrupts = <408 0x2 0 0>;
+               cell-index = <0x30>;
+       };
+       qportal49: qman-portal@c4000 {
+               compatible = "fsl,qman-portal";
+               reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+               interrupts = <410 0x2 0 0>;
+               cell-index = <0x31>;
+       };
+};
+
 &soc {
        #address-cells = <1>;
        #size-cells = <1>;
 /include/ "qoriq-sata2-0.dtsi"
 /include/ "qoriq-sata2-1.dtsi"
 /include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
 /include/ "qoriq-bman1.dtsi"
 
        L2_1: l2-cache-controller@c20000 {
index 97e6d11d1e6ddda49a4342090e8bad936df0045a..48dab6a50437b46bc61648b0e0aa597fd78a4f7e 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index eb76caae11d9848220c0ca426864576b2c15fee5..42796c5b05619df744b54b2fdc13b36706b97f3a 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 9236e3742a2376f6c4ac43095bc0a3446cde84b4..05a00a4d28612ea8dbc139d14cbe8394ee55b92b 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
+       };
+
+       qportals: qman-portals@ff000000 {
+               ranges = <0x0 0xf 0xff000000 0x200000>;
        };
 
        bportals: bman-portals@ff200000 {
index c1e69dc7188ecbebf419332a9615b278fdb1c7d0..d2bb0765bd5a644eb30c2d5721ffccfa398a816c 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 2192fe94866d5ac42528f3543826ef5a5bc24fd5..eca6c697cfd78e6132c6abcdbd6b4e40613e895c 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index fad4416546423374507dfdb59cef032e9a2977a0..4f80c9d02c27f12210c920ba57e1a3af14bcc8f8 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 7382636dc560af9a10071a670235adb26cde4344..d0309a8b974997e37fc4e9afb58610453f40af94 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 35dabf5b60980614d151f8f461304f4608cb580c..05168236d3ab47d8db82ae6c0c4e4530a2e17f6e 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x200000>;
        };
 
+       qportals: qman-portals@ff4200000 {
+               ranges = <0x0 0xf 0xf4200000 0x200000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
diff --git a/arch/powerpc/boot/dts/t1023rdb.dts b/arch/powerpc/boot/dts/t1023rdb.dts
new file mode 100644 (file)
index 0000000..06b090a
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * T1023 RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t102xsi-pre.dtsi"
+
+/ {
+       model = "fsl,T1023RDB";
+       compatible = "fsl,T1023RDB";
+       #address-cells = <2>;
+       #size-cells = <2>;
+       interrupt-parent = <&mpic>;
+
+       ifc: localbus@ffe124000 {
+               reg = <0xf 0xfe124000 0 0x2000>;
+               ranges = <0 0 0xf 0xe8000000 0x08000000
+                         1 0 0xf 0xff800000 0x00010000>;
+
+               nor@0,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       status = "disabled";
+                       compatible = "cfi-flash";
+                       reg = <0x0 0x0 0x8000000>;
+                       bank-width = <2>;
+                       device-width = <1>;
+               };
+
+               nand@1,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "fsl,ifc-nand";
+                       reg = <0x2 0x0 0x10000>;
+               };
+       };
+
+       memory {
+               device_type = "memory";
+       };
+
+       dcsr: dcsr@f00000000 {
+               ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+       };
+
+       soc: soc@ffe000000 {
+               ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+               reg = <0xf 0xfe000000 0 0x00001000>;
+               spi@110000 {
+                       flash@0 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "spansion,s25fl512s";
+                               reg = <0>;
+                               spi-max-frequency = <10000000>; /* input clk */
+                       };
+               };
+
+               i2c@118000 {
+                       eeprom@50 {
+                               compatible = "st,m24256";
+                               reg = <0x50>;
+                       };
+
+                       rtc@68 {
+                               compatible = "dallas,ds1339";
+                               reg = <0x68>;
+                               interrupts = <0x5 0x1 0 0>;
+                       };
+               };
+
+               i2c@118100 {
+               };
+       };
+
+       pci0: pcie@ffe240000 {
+               reg = <0xf 0xfe240000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+
+       pci1: pcie@ffe250000 {
+               reg = <0xf 0xfe250000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+
+       pci2: pcie@ffe260000 {
+               reg = <0xf 0xfe260000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+};
+
+/include/ "fsl/t1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t1024qds.dts b/arch/powerpc/boot/dts/t1024qds.dts
new file mode 100644 (file)
index 0000000..f31fabb
--- /dev/null
@@ -0,0 +1,251 @@
+/*
+ * T1024 QDS Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t102xsi-pre.dtsi"
+
+/ {
+       model = "fsl,T1024QDS";
+       compatible = "fsl,T1024QDS";
+       #address-cells = <2>;
+       #size-cells = <2>;
+       interrupt-parent = <&mpic>;
+
+       ifc: localbus@ffe124000 {
+               reg = <0xf 0xfe124000 0 0x2000>;
+               ranges = <0 0 0xf 0xe8000000 0x08000000
+                         2 0 0xf 0xff800000 0x00010000
+                         3 0 0xf 0xffdf0000 0x00008000>;
+
+               nor@0,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "cfi-flash";
+                       reg = <0x0 0x0 0x8000000>;
+                       bank-width = <2>;
+                       device-width = <1>;
+               };
+
+               nand@2,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "fsl,ifc-nand";
+                       reg = <0x2 0x0 0x10000>;
+               };
+
+               board-control@3,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "fsl,tetra-fpga", "fsl,fpga-qixis";
+                       reg = <3 0 0x300>;
+                       ranges = <0 3 0 0x300>;
+               };
+       };
+
+       memory {
+               device_type = "memory";
+       };
+
+       dcsr: dcsr@f00000000 {
+               ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+       };
+
+       soc: soc@ffe000000 {
+               ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+               reg = <0xf 0xfe000000 0 0x00001000>;
+               spi@110000 {
+                       flash@0 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "micron,n25q128a11";  /* 16MB */
+                               reg = <0>;
+                               spi-max-frequency = <10000000>;
+                       };
+
+                       flash@1 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "sst,sst25wf040";  /* 512KB */
+                               reg = <1>;
+                               spi-max-frequency = <10000000>;
+                       };
+
+                       flash@2 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "eon,en25s64";   /* 8MB */
+                               reg = <2>;
+                               spi-max-frequency = <10000000>;
+                       };
+
+                       slic@2 {
+                               compatible = "maxim,ds26522";
+                               reg = <2>;
+                               spi-max-frequency = <2000000>;
+                       };
+
+                       slic@3 {
+                               compatible = "maxim,ds26522";
+                               reg = <3>;
+                               spi-max-frequency = <2000000>;
+                       };
+               };
+
+               i2c@118000 {
+                       pca9547@77 {
+                               compatible = "nxp,pca9547";
+                               reg = <0x77>;
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+
+                               i2c@0 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       reg = <0x0>;
+
+                                       eeprom@50 {
+                                               compatible = "atmel,24c512";
+                                               reg = <0x50>;
+                                       };
+
+                                       eeprom@51 {
+                                               compatible = "atmel,24c02";
+                                               reg = <0x51>;
+                                       };
+
+                                       eeprom@57 {
+                                               compatible = "atmel,24c02";
+                                               reg = <0x57>;
+                                       };
+                               };
+
+                               i2c@2 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       reg = <0x2>;
+
+                                       ina220@40 {
+                                               compatible = "ti,ina220";
+                                               reg = <0x40>;
+                                               shunt-resistor = <1000>;
+                                       };
+
+                                       ina220@41 {
+                                               compatible = "ti,ina220";
+                                               reg = <0x41>;
+                                               shunt-resistor = <1000>;
+                                       };
+                               };
+
+                               i2c@3 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       reg = <0x3>;
+
+                                       adt7461@4c {
+                                               /* Thermal Monitor */
+                                               compatible = "adi,adt7461";
+                                               reg = <0x4c>;
+                                       };
+
+                                       eeprom@55 {
+                                               compatible = "atmel,24c02";
+                                               reg = <0x55>;
+                                       };
+
+                                       eeprom@56 {
+                                               compatible = "atmel,24c512";
+                                               reg = <0x56>;
+                                       };
+
+                                       eeprom@57 {
+                                               compatible = "atmel,24c512";
+                                               reg = <0x57>;
+                                       };
+                               };
+                       };
+                       rtc@68 {
+                               compatible = "dallas,ds3232";
+                               reg = <0x68>;
+                               interrupts = <0x5 0x1 0 0>;
+                       };
+               };
+       };
+
+       pci0: pcie@ffe240000 {
+               reg = <0xf 0xfe240000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+
+       pci1: pcie@ffe250000 {
+               reg = <0xf 0xfe250000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+
+       pci2: pcie@ffe260000 {
+               reg = <0xf 0xfe260000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+};
+
+/include/ "fsl/t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t1024rdb.dts b/arch/powerpc/boot/dts/t1024rdb.dts
new file mode 100644 (file)
index 0000000..733e723
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * T1024 RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t102xsi-pre.dtsi"
+
+/ {
+       model = "fsl,T1024RDB";
+       compatible = "fsl,T1024RDB";
+       #address-cells = <2>;
+       #size-cells = <2>;
+       interrupt-parent = <&mpic>;
+
+       ifc: localbus@ffe124000 {
+               reg = <0xf 0xfe124000 0 0x2000>;
+               ranges = <0 0 0xf 0xe8000000 0x08000000
+                         2 0 0xf 0xff800000 0x00010000
+                         3 0 0xf 0xffdf0000 0x00008000>;
+
+               nor@0,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "cfi-flash";
+                       reg = <0x0 0x0 0x8000000>;
+                       bank-width = <2>;
+                       device-width = <1>;
+               };
+
+               nand@1,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "fsl,ifc-nand";
+                       reg = <0x2 0x0 0x10000>;
+               };
+
+               board-control@2,0 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "fsl,t1024-cpld";
+                       reg = <3 0 0x300>;
+                       ranges = <0 3 0 0x300>;
+                       bank-width = <1>;
+                       device-width = <1>;
+               };
+       };
+
+       memory {
+               device_type = "memory";
+       };
+
+       dcsr: dcsr@f00000000 {
+               ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+       };
+
+       soc: soc@ffe000000 {
+               ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+               reg = <0xf 0xfe000000 0 0x00001000>;
+               spi@110000 {
+                       flash@0 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "micron,n25q512ax3";
+                               reg = <0>;
+                               spi-max-frequency = <10000000>; /* input clk */
+                       };
+
+                       slic@1 {
+                               compatible = "maxim,ds26522";
+                               reg = <1>;
+                               spi-max-frequency = <2000000>;
+                       };
+
+                       slic@2 {
+                               compatible = "maxim,ds26522";
+                               reg = <2>;
+                               spi-max-frequency = <2000000>;
+                       };
+               };
+
+               i2c@118000 {
+                       adt7461@4c {
+                               /* Thermal Monitor */
+                               compatible = "adi,adt7461";
+                               reg = <0x4c>;
+                       };
+
+                       eeprom@50 {
+                               compatible = "atmel,24c256";
+                               reg = <0x50>;
+                       };
+
+                       rtc@68 {
+                               compatible = "dallas,ds1339";
+                               reg = <0x68>;
+                               interrupts = <0x1 0x1 0 0>;
+                       };
+               };
+
+               i2c@118100 {
+                       pca9546@77 {
+                               compatible = "nxp,pca9546";
+                               reg = <0x77>;
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                       };
+               };
+       };
+
+       pci0: pcie@ffe240000 {
+               reg = <0xf 0xfe240000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+
+       pci1: pcie@ffe250000 {
+               reg = <0xf 0xfe250000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+
+       pci2: pcie@ffe260000 {
+               reg = <0xf 0xfe260000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
+       };
+};
+
+/include/ "fsl/t1024si-post.dtsi"
index f7e9bfbeefc7b1b288229b6a99172db10582dd45..1498d1e4aecf6338244e71b0b4f9c35b05d9883f 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        ifc: localbus@ffe124000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 76e07a3f2ca8b993e72b99ea0233678387d51e05..830ea484295b846e5b07f1b05998f07760f7fa80 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        ifc: localbus@ffe124000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index c42e07f4f648d8db7cb4180f150377d06fa5e995..869f9159b4d14ff9ed775c41f8dd2f17b8df0259 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        ifc: localbus@ffe124000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index e1463b165d0ecf3282c67ccfd5aad8fdd90ce1c6..693d2a8fa01cde3b78e2717af34bca06a737fa72 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        ifc: localbus@ffe124000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 6df77766410b13591fb5fc4c658675f7105630f0..93722da10e16899da336c4f6f5a0095e03c1f81e 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 46049cf37f022f3f0f791cbf99218bd964e0e4ae..993eb4b8a487543663c8530f44397d350648c468 100644 (file)
                        size = <0 0x1000000>;
                        alignment = <0 0x1000000>;
                };
+               qman_fqd: qman-fqd {
+                       size = <0 0x400000>;
+                       alignment = <0 0x400000>;
+               };
+               qman_pfdr: qman-pfdr {
+                       size = <0 0x2000000>;
+                       alignment = <0 0x2000000>;
+               };
        };
 
        dcsr: dcsr@f00000000 {
                ranges = <0x0 0xf 0xf4000000 0x2000000>;
        };
 
+       qportals: qman-portals@ff6000000 {
+               ranges = <0x0 0xf 0xf6000000 0x2000000>;
+       };
+
        soc: soc@ffe000000 {
                ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
                reg = <0xf 0xfe000000 0 0x00001000>;
index 34f3ea1729e0b052773766094ff30cca3c912805..858b539d004bbfd22793bc71106d07ae0579f393 100644 (file)
@@ -108,7 +108,7 @@ CONFIG_SENSORS_LM90=y
 CONFIG_WATCHDOG=y
 CONFIG_USB=y
 CONFIG_USB_MON=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_STORAGE=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config
new file mode 100644 (file)
index 0000000..ee43fdb
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
index aad501ae38344f4f16b220845b40d6ce0305a2f8..a97efc2146fdfe949bf0f569c549497d4dc5c6cf 100644 (file)
@@ -155,6 +155,7 @@ CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 CONFIG_PCNET32=y
 CONFIG_TIGON3=y
+CONFIG_BNX2X=m
 CONFIG_CHELSIO_T1=m
 CONFIG_BE2NET=m
 CONFIG_S2IO=m
index c2e39f66b182a6b4a8d1e9558eef6824a2134d0a..0d9efcedaf3473c4573548995069b0e5cec5c765 100644 (file)
@@ -154,6 +154,7 @@ CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
 CONFIG_PCNET32=y
 CONFIG_TIGON3=y
+CONFIG_BNX2X=m
 CONFIG_CHELSIO_T1=m
 CONFIG_BE2NET=m
 CONFIG_S2IO=m
@@ -297,7 +298,6 @@ CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
deleted file mode 100644 (file)
index 09bc96e..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2048
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_USER_NS=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
-# CONFIG_PPC_PMAC is not set
-CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
-CONFIG_HZ_100=y
-CONFIG_BINFMT_MISC=m
-CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_KEXEC=y
-CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
-CONFIG_SCHED_SMT=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_NET_IPIP=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_ADVANCED is not set
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_BE2ISCSI=m
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_IBMVSCSI=y
-CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-CONFIG_SCSI_IPR=y
-CONFIG_SCSI_QLA_FC=m
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-# CONFIG_ATA_SFF is not set
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_MD_RAID0=y
-CONFIG_MD_RAID1=y
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_UEVENT=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_VXLAN=m
-CONFIG_NETCONSOLE=y
-CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
-CONFIG_ACENIC=m
-CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
-CONFIG_TIGON3=y
-CONFIG_CHELSIO_T1=m
-CONFIG_BE2NET=m
-CONFIG_S2IO=m
-CONFIG_IBMVETH=y
-CONFIG_EHEA=y
-CONFIG_E100=y
-CONFIG_E1000=y
-CONFIG_E1000E=y
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_MLX4_EN=m
-CONFIG_MYRI10GE=m
-CONFIG_QLGE=m
-CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_PCSPKR=m
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_ICOM=m
-CONFIG_SERIAL_JSM=m
-CONFIG_HVC_CONSOLE=y
-CONFIG_HVC_RTAS=y
-CONFIG_HVCS=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IBM_BSR=m
-CONFIG_GEN_RTC=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G=y
-CONFIG_FB_RADEON=y
-CONFIG_FB_IBM_GXT4500=y
-CONFIG_LCD_PLATFORM=m
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=m
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=m
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_EHCA=m
-CONFIG_INFINIBAND_CXGB3=m
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_NILFS2_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
-CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_HUGETLBFS=y
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_PSTORE=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_CODE_PATCHING_SELFTEST=y
-CONFIG_FTR_FIXUP_SELFTEST=y
-CONFIG_MSI_BITMAP_SELFTEST=y
-CONFIG_XMON=y
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM_BOOK3S_64=m
-CONFIG_KVM_BOOK3S_64_HV=m
index 6367b8347dad9b0bceb3507573450b2e7c672400..b118072670fb15ceac83fce37967f1b07e4cffec 100644 (file)
@@ -242,11 +242,13 @@ enum {
 
 /* We only set the TM feature if the kernel was compiled with TM supprt */
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-#define CPU_FTR_TM_COMP                CPU_FTR_TM
-#define PPC_FEATURE2_HTM_COMP  PPC_FEATURE2_HTM
+#define CPU_FTR_TM_COMP                        CPU_FTR_TM
+#define PPC_FEATURE2_HTM_COMP          PPC_FEATURE2_HTM
+#define PPC_FEATURE2_HTM_NOSC_COMP     PPC_FEATURE2_HTM_NOSC
 #else
-#define CPU_FTR_TM_COMP                0
-#define PPC_FEATURE2_HTM_COMP  0
+#define CPU_FTR_TM_COMP                        0
+#define PPC_FEATURE2_HTM_COMP          0
+#define PPC_FEATURE2_HTM_NOSC_COMP     0
 #endif
 
 /* We need to mark all pages as being coherent if we're SMP or we have a
@@ -366,7 +368,7 @@ enum {
            CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
 #define CPU_FTRS_CLASSIC32     (CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX   (CPU_FTR_USE_TB)
+#define CPU_FTRS_8XX   (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_40X   (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_44X   (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
index 5be6c4753667ef66b5c0151979802a5d2c627262..ba42e46ea58eeed448d3d7abb5c67f5570e96bca 100644 (file)
@@ -31,9 +31,9 @@ extern cpumask_t threads_core_mask;
 /* cpu_thread_mask_to_cores - Return a cpumask of one per cores
  *                            hit by the argument
  *
- * @threads:   a cpumask of threads
+ * @threads:   a cpumask of online threads
  *
- * This function returns a cpumask which will have one "cpu" (or thread)
+ * This function returns a cpumask which will have one online cpu's
  * bit set for each core that has at least one thread set in the argument.
  *
  * This can typically be used for things like IPI for tlb invalidations
@@ -42,13 +42,16 @@ extern cpumask_t threads_core_mask;
 static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
 {
        cpumask_t       tmp, res;
-       int             i;
+       int             i, cpu;
 
        cpumask_clear(&res);
        for (i = 0; i < NR_CPUS; i += threads_per_core) {
                cpumask_shift_left(&tmp, &threads_core_mask, i);
-               if (cpumask_intersects(threads, &tmp))
-                       cpumask_set_cpu(i, &res);
+               if (cpumask_intersects(threads, &tmp)) {
+                       cpu = cpumask_next_and(-1, &tmp, cpu_online_mask);
+                       if (cpu < nr_cpu_ids)
+                               cpumask_set_cpu(cpu, &res);
+               }
        }
        return res;
 }
index 9f1371bab5fc2141a2345ce5ad1fac286465e868..e9bdda88f1fbb1e38af6b46abcafc330f7f2cdbd 100644 (file)
@@ -46,6 +46,9 @@ struct dev_archdata {
 #ifdef CONFIG_FAIL_IOMMU
        int fail_iommu;
 #endif
+#ifdef CONFIG_CXL_BASE
+       struct cxl_context      *cxl_ctx;
+#endif
 };
 
 struct pdev_archdata {
index a52db28ecc1e1f45287d24806f554a46ec87c6f6..c5eb86f3d452fbe66d44ae1cce9bbfff91a8b14d 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/time.h>
 #include <linux/atomic.h>
 
+#include <uapi/asm/eeh.h>
+
 struct pci_dev;
 struct pci_bus;
 struct pci_dn;
@@ -185,11 +187,6 @@ enum {
 #define EEH_STATE_DMA_ACTIVE   (1 << 4)        /* Active DMA           */
 #define EEH_STATE_MMIO_ENABLED (1 << 5)        /* MMIO enabled         */
 #define EEH_STATE_DMA_ENABLED  (1 << 6)        /* DMA enabled          */
-#define EEH_PE_STATE_NORMAL            0       /* Normal state         */
-#define EEH_PE_STATE_RESET             1       /* PE reset asserted    */
-#define EEH_PE_STATE_STOPPED_IO_DMA    2       /* Frozen PE            */
-#define EEH_PE_STATE_STOPPED_DMA       4       /* Stopped DMA, Enabled IO */
-#define EEH_PE_STATE_UNAVAIL           5       /* Unavailable          */
 #define EEH_RESET_DEACTIVATE   0       /* Deactivate the PE reset      */
 #define EEH_RESET_HOT          1       /* Hot reset                    */
 #define EEH_RESET_FUNDAMENTAL  3       /* Fundamental reset            */
@@ -294,6 +291,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
 int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+                     unsigned long addr, unsigned long mask);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
index 1e27d63385655b9c275937b1ee886b33b8c50ed9..ca18cff909006b66017587c30777665feffac913 100644 (file)
 extern int iommu_is_off;
 extern int iommu_force_on;
 
+struct iommu_table_ops {
+       /*
+        * When called with direction==DMA_NONE, it is equal to clear().
+        * uaddr is a linear map address.
+        */
+       int (*set)(struct iommu_table *tbl,
+                       long index, long npages,
+                       unsigned long uaddr,
+                       enum dma_data_direction direction,
+                       struct dma_attrs *attrs);
+#ifdef CONFIG_IOMMU_API
+       /*
+        * Exchanges existing TCE with new TCE plus direction bits;
+        * returns old TCE and DMA direction mask.
+        * @tce is a physical address.
+        */
+       int (*exchange)(struct iommu_table *tbl,
+                       long index,
+                       unsigned long *hpa,
+                       enum dma_data_direction *direction);
+#endif
+       void (*clear)(struct iommu_table *tbl,
+                       long index, long npages);
+       /* get() returns a physical address */
+       unsigned long (*get)(struct iommu_table *tbl, long index);
+       void (*flush)(struct iommu_table *tbl);
+       void (*free)(struct iommu_table *tbl);
+};
+
+/* These are used by VIO */
+extern struct iommu_table_ops iommu_table_lpar_multi_ops;
+extern struct iommu_table_ops iommu_table_pseries_ops;
+
 /*
  * IOMAP_MAX_ORDER defines the largest contiguous block
  * of dma space we can get.  IOMAP_MAX_ORDER = 13
@@ -64,6 +97,9 @@ struct iommu_pool {
 struct iommu_table {
        unsigned long  it_busno;     /* Bus number this table belongs to */
        unsigned long  it_size;      /* Size of iommu table in entries */
+       unsigned long  it_indirect_levels;
+       unsigned long  it_level_size;
+       unsigned long  it_allocated_size;
        unsigned long  it_offset;    /* Offset into global table */
        unsigned long  it_base;      /* mapped address of tce table */
        unsigned long  it_index;     /* which iommu table this is */
@@ -75,15 +111,16 @@ struct iommu_table {
        struct iommu_pool pools[IOMMU_NR_POOLS];
        unsigned long *it_map;       /* A simple allocation bitmap for now */
        unsigned long  it_page_shift;/* table iommu page size */
-#ifdef CONFIG_IOMMU_API
-       struct iommu_group *it_group;
-#endif
-       void (*set_bypass)(struct iommu_table *tbl, bool enable);
-#ifdef CONFIG_PPC_POWERNV
-       void           *data;
-#endif
+       struct list_head it_group_list;/* List of iommu_table_group_link */
+       unsigned long *it_userspace; /* userspace view of the table */
+       struct iommu_table_ops *it_ops;
 };
 
+#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+               ((tbl)->it_userspace ? \
+                       &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
+                       NULL)
+
 /* Pure 2^n version of get_order */
 static inline __attribute_const__
 int get_iommu_order(unsigned long size, struct iommu_table *tbl)
@@ -112,14 +149,62 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
                                            int nid);
+#define IOMMU_TABLE_GROUP_MAX_TABLES   2
+
+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+       unsigned long (*get_table_size)(
+                       __u32 page_shift,
+                       __u64 window_size,
+                       __u32 levels);
+       long (*create_table)(struct iommu_table_group *table_group,
+                       int num,
+                       __u32 page_shift,
+                       __u64 window_size,
+                       __u32 levels,
+                       struct iommu_table **ptbl);
+       long (*set_window)(struct iommu_table_group *table_group,
+                       int num,
+                       struct iommu_table *tblnew);
+       long (*unset_window)(struct iommu_table_group *table_group,
+                       int num);
+       /* Switch ownership from platform code to external user (e.g. VFIO) */
+       void (*take_ownership)(struct iommu_table_group *table_group);
+       /* Switch ownership from external user (e.g. VFIO) back to core */
+       void (*release_ownership)(struct iommu_table_group *table_group);
+};
+
+struct iommu_table_group_link {
+       struct list_head next;
+       struct rcu_head rcu;
+       struct iommu_table_group *table_group;
+};
+
+struct iommu_table_group {
+       /* IOMMU properties */
+       __u32 tce32_start;
+       __u32 tce32_size;
+       __u64 pgsizes; /* Bitmap of supported page sizes */
+       __u32 max_dynamic_windows_supported;
+       __u32 max_levels;
+
+       struct iommu_group *group;
+       struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+       struct iommu_table_group_ops *ops;
+};
+
 #ifdef CONFIG_IOMMU_API
-extern void iommu_register_group(struct iommu_table *tbl,
+
+extern void iommu_register_group(struct iommu_table_group *table_group,
                                 int pci_domain_number, unsigned long pe_num);
 extern int iommu_add_device(struct device *dev);
 extern void iommu_del_device(struct device *dev);
 extern int __init tce_iommu_bus_notifier_init(void);
+extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+               unsigned long *hpa, enum dma_data_direction *direction);
 #else
-static inline void iommu_register_group(struct iommu_table *tbl,
+static inline void iommu_register_group(struct iommu_table_group *table_group,
                                        int pci_domain_number,
                                        unsigned long pe_num)
 {
@@ -140,13 +225,6 @@ static inline int __init tce_iommu_bus_notifier_init(void)
 }
 #endif /* !CONFIG_IOMMU_API */
 
-static inline void set_iommu_table_base_and_group(struct device *dev,
-                                                 void *base)
-{
-       set_iommu_table_base(dev, base);
-       iommu_add_device(dev);
-}
-
 extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
                            struct scatterlist *sglist, int nelems,
                            unsigned long mask,
@@ -197,20 +275,13 @@ extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
                unsigned long npages);
 extern int iommu_tce_put_param_check(struct iommu_table *tbl,
                unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-               unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
-               unsigned long entry);
-extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
-               unsigned long entry, unsigned long pages);
-extern int iommu_put_tce_user_mode(struct iommu_table *tbl,
-               unsigned long entry, unsigned long tce);
 
 extern void iommu_flush_tce(struct iommu_table *tbl);
 extern int iommu_take_ownership(struct iommu_table *tbl);
 extern void iommu_release_ownership(struct iommu_table *tbl);
 
 extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
+extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_IOMMU_H */
index ef8899432ae72dd0f91359cfba7e74d3c162b620..952579f5e79a93ecad26afeda4a5c4ef2d133250 100644 (file)
@@ -65,31 +65,6 @@ struct machdep_calls {
         * destroyed as well */
        void            (*hpte_clear_all)(void);
 
-       int             (*tce_build)(struct iommu_table *tbl,
-                                    long index,
-                                    long npages,
-                                    unsigned long uaddr,
-                                    enum dma_data_direction direction,
-                                    struct dma_attrs *attrs);
-       void            (*tce_free)(struct iommu_table *tbl,
-                                   long index,
-                                   long npages);
-       unsigned long   (*tce_get)(struct iommu_table *tbl,
-                                   long index);
-       void            (*tce_flush)(struct iommu_table *tbl);
-
-       /* _rm versions are for real mode use only */
-       int             (*tce_build_rm)(struct iommu_table *tbl,
-                                    long index,
-                                    long npages,
-                                    unsigned long uaddr,
-                                    enum dma_data_direction direction,
-                                    struct dma_attrs *attrs);
-       void            (*tce_free_rm)(struct iommu_table *tbl,
-                                   long index,
-                                   long npages);
-       void            (*tce_flush_rm)(struct iommu_table *tbl);
-
        void __iomem *  (*ioremap)(phys_addr_t addr, unsigned long size,
                                   unsigned long flags, void *caller);
        void            (*iounmap)(volatile void __iomem *token);
@@ -131,12 +106,6 @@ struct machdep_calls {
        /* To setup PHBs when using automatic OF platform driver for PCI */
        int             (*pci_setup_phb)(struct pci_controller *host);
 
-#ifdef CONFIG_PCI_MSI
-       int             (*setup_msi_irqs)(struct pci_dev *dev,
-                                         int nvec, int type);
-       void            (*teardown_msi_irqs)(struct pci_dev *dev);
-#endif
-
        void            (*restart)(char *cmd);
        void            (*halt)(void);
        void            (*panic)(char *str);
index 986b9e1e1044f62a9b66454ae71d9a69a3bbd366..f05500a29a60d6f23f4e23de99821995511ccae0 100644 (file)
 #define MI_Ks          0x80000000      /* Should not be set */
 #define MI_Kp          0x40000000      /* Should always be set */
 
+/*
+ * All pages' PP exec bits are set to 000, which means Execute for Supervisor
+ * and no Execute for User.
+ * Then we use the APG to say whether accesses are according to Page rules,
+ * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone)
+ * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER
+ * 0 (00) => Not User, no exec => 11 (all accesses performed as user)
+ * 1 (01) => User but no exec => 11 (all accesses performed as user)
+ * 2 (10) => Not User, exec => 01 (rights according to page definition)
+ * 3 (11) => User, exec => 00 (all accesses performed as supervisor)
+ */
+#define MI_APG_INIT    0xf4ffffff
+
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MI_RPN is written, bits in
  * this register are used to create the TLB entry.
 #define MD_Ks          0x80000000      /* Should not be set */
 #define MD_Kp          0x40000000      /* Should always be set */
 
+/*
+ * All pages' PP data bits are set to either 000 or 011, which means
+ * respectively RW for Supervisor and no access for User, or RO for
+ * Supervisor and no access for user.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * Therefore, we define 2 APG groups. lsb is _PAGE_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor
+ *                                 according to page definition)
+ */
+#define MD_APG_INIT    0x4fffffff
+
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MD_RPN is written, bits in
  * this register are used to create the TLB entry.
@@ -145,7 +171,14 @@ typedef struct {
 } mm_context_t;
 #endif /* !__ASSEMBLY__ */
 
+#if (PAGE_SHIFT == 12)
 #define mmu_virtual_psize      MMU_PAGE_4K
+#elif (PAGE_SHIFT == 14)
+#define mmu_virtual_psize      MMU_PAGE_16K
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
 #define mmu_linear_psize       MMU_PAGE_8M
 
 #endif /* _ASM_POWERPC_MMU_8XX_H_ */
index 1da6a81ce541fad8f083f23e3509dda137928d0a..a82f5347540ae2c875733253a8639a089399fa3f 100644 (file)
@@ -536,6 +536,9 @@ typedef struct {
        /* for 4K PTE fragment support */
        void *pte_frag;
 #endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+       struct list_head iommu_group_mem_list;
+#endif
 } mm_context_t;
 
 
index 73382eba02dccf2d8373bc860e4ebbe7fe7cac0a..3e5184210d9b984fca5aa57cfe3c70e6539d74bb 100644 (file)
  */
 extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 extern void destroy_context(struct mm_struct *mm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct mm_iommu_table_group_mem_t;
+
+extern bool mm_iommu_preregistered(void);
+extern long mm_iommu_get(unsigned long ua, unsigned long entries,
+               struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_init(mm_context_t *ctx);
+extern void mm_iommu_cleanup(mm_context_t *ctx);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
+               unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
+               unsigned long entries);
+extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+               unsigned long ua, unsigned long *hpa);
+extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+#endif
 
 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
 extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
index 0321a909e663bf1899e81154bfcff65b0febd7b1..e9e4c52f368543324acbbd992b9fc5cb14fd0f26 100644 (file)
 #define OPAL_FLASH_READ                                110
 #define OPAL_FLASH_WRITE                       111
 #define OPAL_FLASH_ERASE                       112
-#define OPAL_LAST                              112
+#define OPAL_PRD_MSG                           113
+#define OPAL_LAST                              113
 
 /* Device tree flags */
 
 #define OPAL_PM_WINKLE_ENABLED         0x00040000
 #define OPAL_PM_SLEEP_ENABLED_ER1      0x00080000 /* with workaround */
 
+/*
+ * OPAL_CONFIG_CPU_IDLE_STATE parameters
+ */
+#define OPAL_CONFIG_IDLE_FASTSLEEP     1
+#define OPAL_CONFIG_IDLE_UNDO          0
+#define OPAL_CONFIG_IDLE_APPLY         1
+
 #ifndef __ASSEMBLY__
 
 /* Other enums */
@@ -352,6 +360,7 @@ enum opal_msg_type {
        OPAL_MSG_SHUTDOWN,              /* params[0] = 1 reboot, 0 shutdown */
        OPAL_MSG_HMI_EVT,
        OPAL_MSG_DPO,
+       OPAL_MSG_PRD,
        OPAL_MSG_TYPE_MAX,
 };
 
@@ -674,6 +683,23 @@ typedef struct oppanel_line {
        __be64 line_len;
 } oppanel_line_t;
 
+enum opal_prd_msg_type {
+       OPAL_PRD_MSG_TYPE_INIT = 0,     /* HBRT --> OPAL */
+       OPAL_PRD_MSG_TYPE_FINI,         /* HBRT/kernel --> OPAL */
+       OPAL_PRD_MSG_TYPE_ATTN,         /* HBRT <-- OPAL */
+       OPAL_PRD_MSG_TYPE_ATTN_ACK,     /* HBRT --> OPAL */
+       OPAL_PRD_MSG_TYPE_OCC_ERROR,    /* HBRT <-- OPAL */
+       OPAL_PRD_MSG_TYPE_OCC_RESET,    /* HBRT <-- OPAL */
+};
+
+struct opal_prd_msg_header {
+       uint8_t         type;
+       uint8_t         pad[1];
+       __be16          size;
+};
+
+struct opal_prd_msg;
+
 /*
  * SG entries
  *
index 042af1abfc4dd02a5f0902bd41b7e67237bfdfbd..958e941c0cda886bb340fc670bec7e172a94d36b 100644 (file)
@@ -186,6 +186,7 @@ int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
+int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
                uint64_t msg_len);
@@ -193,6 +194,7 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
                uint64_t *msg_len);
 int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
                         struct opal_i2c_request *oreq);
+int64_t opal_prd_msg(struct opal_prd_msg *msg);
 
 int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
                uint64_t size, uint64_t token);
@@ -239,6 +241,10 @@ extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
 extern void opal_sys_param_init(void);
 extern void opal_msglog_init(void);
+extern int opal_async_comp_init(void);
+extern int opal_sensor_init(void);
+extern int opal_hmi_handler_init(void);
+extern int opal_event_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
@@ -250,6 +256,8 @@ extern int opal_resync_timebase(void);
 
 extern void opal_lpc_init(void);
 
+extern int opal_event_request(unsigned int opal_event_nr);
+
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
                                             unsigned long vmalloc_size);
 void opal_free_sg_list(struct opal_sg_list *sg);
index 69c059887a2c0def2a13f566c385c3eca97c0a60..71294a6e976e9c338a81ac69981d3d3bd9a62144 100644 (file)
@@ -278,9 +278,7 @@ extern long long virt_phys_offset;
 
 #ifndef __ASSEMBLY__
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
 /* These are used to make use of C type-checking. */
 
 /* PTE level */
index 1811c44bf34bcb6564036a76fb5b7a63563edf83..712add5904454362ff145aa9391ffd8674dfe604 100644 (file)
@@ -27,9 +27,23 @@ struct pci_controller_ops {
         * allow assignment/enabling of the device. */
        bool            (*enable_device_hook)(struct pci_dev *);
 
+       void            (*disable_device)(struct pci_dev *);
+
+       void            (*release_device)(struct pci_dev *);
+
        /* Called during PCI resource reassignment */
        resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type);
        void            (*reset_secondary_bus)(struct pci_dev *dev);
+
+#ifdef CONFIG_PCI_MSI
+       int             (*setup_msi_irqs)(struct pci_dev *dev,
+                                         int nvec, int type);
+       void            (*teardown_msi_irqs)(struct pci_dev *dev);
+#endif
+
+       int             (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask);
+
+       void            (*shutdown)(struct pci_controller *);
 };
 
 /*
@@ -185,7 +199,7 @@ struct pci_dn {
 
        struct  pci_dn *parent;
        struct  pci_controller *phb;    /* for pci devices */
-       struct  iommu_table *iommu_table;       /* for phb's or bridges */
+       struct  iommu_table_group *table_group; /* for phb's or bridges */
        struct  device_node *node;      /* back-pointer to the device_node */
 
        int     pci_ext_config_space;   /* for pci devices */
index 64b52b1cf5425dcaad0a9cc06ea77037675aa88f..9c326565d498fb1ba8d85e00f0bd20a2350c5a43 100644 (file)
@@ -170,24 +170,6 @@ static inline unsigned long pte_update(pte_t *p,
 #ifdef PTE_ATOMIC_UPDATES
        unsigned long old, tmp;
 
-#ifdef CONFIG_PPC_8xx
-       unsigned long tmp2;
-
-       __asm__ __volatile__("\
-1:     lwarx   %0,0,%4\n\
-       andc    %1,%0,%5\n\
-       or      %1,%1,%6\n\
-       /* 0x200 == Extended encoding, bit 22 */ \
-       /* Bit 22 has to be 1 when _PAGE_USER is unset and _PAGE_RO is set */ \
-       rlwimi  %1,%1,32-1,0x200\n /* get _PAGE_RO */ \
-       rlwinm  %3,%1,32-2,0x200\n /* get _PAGE_USER */ \
-       andc    %1,%1,%3\n\
-       stwcx.  %1,0,%4\n\
-       bne-    1b"
-       : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2)
-       : "r" (p), "r" (clr), "r" (set), "m" (*p)
-       : "cc" );
-#else /* CONFIG_PPC_8xx */
        __asm__ __volatile__("\
 1:     lwarx   %0,0,%3\n\
        andc    %1,%0,%4\n\
@@ -198,7 +180,6 @@ static inline unsigned long pte_update(pte_t *p,
        : "=&r" (old), "=&r" (tmp), "=m" (*p)
        : "r" (p), "r" (clr), "r" (set), "m" (*p)
        : "cc" );
-#endif /* CONFIG_PPC_8xx */
 #else /* PTE_ATOMIC_UPDATES */
        unsigned long old = pte_val(*p);
        *p = __pte((old & ~clr) | set);
index 43e6ad424c7fc30503db061360fbd1565811b17d..f890f7ce159323d8a3f35fca2f95a594ed0fa9e9 100644 (file)
  */
 #ifndef __real_pte
 
-#ifdef STRICT_MM_TYPECHECKS
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
 #define __real_pte(e,p)                ((real_pte_t){(e)})
 #define __rpte_to_pte(r)       ((r).pte)
 #else
@@ -347,11 +347,27 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
        pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 /* Encode and de-code a swap entry */
-#define __swp_type(entry)      (((entry).val >> 1) & 0x3f)
-#define __swp_offset(entry)    ((entry).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
-#define __pte_to_swp_entry(pte)        ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
-#define __swp_entry_to_pte(x)  ((pte_t) { (x).val << PTE_RPN_SHIFT })
+#define MAX_SWAPFILES_CHECK() do { \
+       BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+       /*                                                      \
+        * Don't have overlapping bits with _PAGE_HPTEFLAGS     \
+        * We filter HPTEFLAGS on set_pte.                      \
+        */                                                     \
+       BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+       } while (0)
+/*
+ * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
+ */
+#define SWP_TYPE_BITS 5
+#define __swp_type(x)          (((x).val >> _PAGE_BIT_SWAP_TYPE) \
+                               & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)                ((x).val >> PTE_RPN_SHIFT)
+#define __swp_entry(type, offset)      ((swp_entry_t) { \
+                                       ((type) << _PAGE_BIT_SWAP_TYPE) \
+                                       | ((offset) << PTE_RPN_SHIFT) })
+
+#define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val((pte)) })
+#define __swp_entry_to_pte(x)          __pte((x).val)
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
index f9b498292a5c1cde5eeec7db06ae53759fd6921c..6f77f71ee96445792a7263f1bf370aa73c2095cf 100644 (file)
@@ -11,7 +11,7 @@
 #define _ASM_PNV_PCI_H
 
 #include <linux/pci.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
index bf117d8fb45fe773bb6154578961dd031c921f28..28ded5d9b57961b7be7b9265cf1f334a5a79395e 100644 (file)
@@ -295,6 +295,15 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC64
        unsigned long   dscr;
+       /*
+        * This member element dscr_inherit indicates that the process
+        * has explicitly attempted and changed the DSCR register value
+        * for itself. Hence kernel wont use the default CPU DSCR value
+        * contained in the PACA structure anymore during process context
+        * switch. Once this variable is set, this behaviour will also be
+        * inherited to all the children of this process from that point
+        * onwards.
+        */
        int             dscr_inherit;
        unsigned long   ppr;    /* used to save/restore SMT priority */
 #endif
index 97bae64afdaabd70ee30768f3dc785410cba1aa0..a0e2ba9609760e4108ce9aca0a4fd255fa15b1d1 100644 (file)
 #define _PAGE_SPECIAL  0x0008  /* SW entry, forced to 0 by the TLB miss */
 #define _PAGE_DIRTY    0x0100  /* C: page changed */
 
-/* These 4 software bits must be masked out when the entry is loaded
- * into the TLB, 1 SW bit left(0x0080).
+/* These 4 software bits must be masked out when the L2 entry is loaded
+ * into the TLB.
  */
-#define _PAGE_GUARDED  0x0010  /* software: guarded access */
-#define _PAGE_ACCESSED 0x0020  /* software: page referenced */
-#define _PAGE_WRITETHRU        0x0040  /* software: caching is write through */
+#define _PAGE_GUARDED  0x0010  /* Copied to L1 G entry in DTLB */
+#define _PAGE_USER     0x0020  /* Copied to L1 APG lsb */
+#define _PAGE_EXEC     0x0040  /* Copied to L1 APG */
+#define _PAGE_WRITETHRU        0x0080  /* software: caching is write through */
+#define _PAGE_ACCESSED 0x0800  /* software: page referenced */
 
-/* Setting any bits in the nibble with the follow two controls will
- * require a TLB exception handler change.  It is assumed unused bits
- * are always zero.
- */
-#define _PAGE_RO       0x0400  /* lsb PP bits */
-#define _PAGE_USER     0x0800  /* msb PP bits */
-/* set when _PAGE_USER is unset and _PAGE_RO is set */
-#define _PAGE_KNLRO    0x0200
+#define _PAGE_RO       0x0600  /* Supervisor RO, User no access */
 
 #define _PMD_PRESENT   0x0001
 #define _PMD_BAD       0x0ff0
 #define _PMD_PAGE_MASK 0x000c
 #define _PMD_PAGE_8M   0x000c
 
-#define _PTE_NONE_MASK _PAGE_KNLRO
-
 /* Until my rework is finished, 8xx still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES     1
 
 /* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO        (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO)
-#define _PAGE_KERNEL_ROX       (_PAGE_EXEC | _PAGE_RO | _PAGE_KNLRO)
+#define _PAGE_KERNEL_RO                (_PAGE_SHARED | _PAGE_RO)
+#define _PAGE_KERNEL_ROX       (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW                (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
+                                _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RWX       (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
+                                _PAGE_HWWRITE | _PAGE_EXEC)
 
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_PTE_8xx_H */
index 91a704952ca1a96234b9088c4493b6371bbbefcf..8d8473278d91c37e1bf5b742bbe20f8938a9ed9a 100644 (file)
@@ -11,6 +11,7 @@
 /* Architected bits */
 #define _PAGE_PRESENT  0x000001 /* software: pte contains a translation */
 #define _PAGE_SW1      0x000002
+#define _PAGE_BIT_SWAP_TYPE    2
 #define _PAGE_BAP_SR   0x000004
 #define _PAGE_BAP_UR   0x000008
 #define _PAGE_BAP_SW   0x000010
index c5a755ef7011ad18389ea9fca4db9236824e7343..b7c8d079c121e8fe6bea35eaad0eeeee206d4404 100644 (file)
@@ -85,10 +85,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
  * 64-bit PTEs
  */
 #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-#define PTE_RPN_MAX    (1ULL << (64 - PTE_RPN_SHIFT))
 #define PTE_RPN_MASK   (~((1ULL<<PTE_RPN_SHIFT)-1))
 #else
-#define PTE_RPN_MAX    (1UL << (32 - PTE_RPN_SHIFT))
 #define PTE_RPN_MASK   (~((1UL<<PTE_RPN_SHIFT)-1))
 #endif
 
index fc852f7e7b3a63f86e94f0cb43b62eb2b11fc3a3..ef612c160da7c8fb8d4b11ed8a6f67c270d2dcce 100644 (file)
@@ -16,6 +16,7 @@
  */
 #define _PAGE_PRESENT          0x0001 /* software: pte contains a translation */
 #define _PAGE_USER             0x0002 /* matches one of the PP bits */
+#define _PAGE_BIT_SWAP_TYPE    2
 #define _PAGE_EXEC             0x0004 /* No execute on POWER4 and newer (we invert) */
 #define _PAGE_GUARDED          0x0008
 /* We can derive Memory coherence from _PAGE_NO_CACHE */
index f1863a138b4a496d726bbfe791b2c4f034ea5973..71f2b3f02cf8848425b26d92e6bd6f650ce726f8 100644 (file)
@@ -358,7 +358,7 @@ SYSCALL_SPU(setns)
 COMPAT_SYS(process_vm_readv)
 COMPAT_SYS(process_vm_writev)
 SYSCALL(finit_module)
-SYSCALL(ni_syscall) /* sys_kcmp */
+SYSCALL(kcmp) /* sys_kcmp */
 SYSCALL_SPU(sched_setattr)
 SYSCALL_SPU(sched_getattr)
 SYSCALL_SPU(renameat2)
index c15da6073cb8b8e177c715fe81eb76081432d62e..8e86b48d03699047dda0f493a3955c8c05e34909 100644 (file)
@@ -144,6 +144,26 @@ TRACE_EVENT_FN(opal_exit,
 );
 #endif
 
+TRACE_EVENT(hash_fault,
+
+           TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+           TP_ARGS(addr, access, trap),
+           TP_STRUCT__entry(
+                   __field(unsigned long, addr)
+                   __field(unsigned long, access)
+                   __field(unsigned long, trap)
+                   ),
+
+           TP_fast_assign(
+                   __entry->addr = addr;
+                   __entry->access = access;
+                   __entry->trap = trap;
+                   ),
+
+           TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+                     __entry->addr, __entry->access, __entry->trap)
+);
+
 #endif /* _TRACE_POWERPC_H */
 
 #undef TRACE_INCLUDE_PATH
index a0c071d24e0e5e744969bc2eeea473b210bde785..2a8ebae0936beb0f6b3ec46eafaf979f0d8ddedd 100644 (file)
@@ -265,7 +265,7 @@ do {                                                                \
 ({                                                             \
        long __gu_err;                                          \
        unsigned long __gu_val;                                 \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);     \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
        __chk_user_ptr(ptr);                                    \
        if (!is_kernel_addr((unsigned long)__gu_addr))          \
                might_fault();                                  \
@@ -279,7 +279,7 @@ do {                                                                \
 ({                                                             \
        long __gu_err;                                          \
        long long __gu_val;                                     \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);     \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
        __chk_user_ptr(ptr);                                    \
        if (!is_kernel_addr((unsigned long)__gu_addr))          \
                might_fault();                                  \
@@ -293,7 +293,7 @@ do {                                                                \
 ({                                                                     \
        long __gu_err = -EFAULT;                                        \
        unsigned long  __gu_val = 0;                                    \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);             \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);           \
        might_fault();                                                  \
        if (access_ok(VERIFY_READ, __gu_addr, (size)))                  \
                __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
@@ -305,7 +305,7 @@ do {                                                                \
 ({                                                             \
        long __gu_err;                                          \
        unsigned long __gu_val;                                 \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);     \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
        __chk_user_ptr(ptr);                                    \
        __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
index 79c4068be278fb38ce0033cf9065c5a596beb28b..f44a027818afe549d231b72cdffb9b4598f1443e 100644 (file)
@@ -18,6 +18,7 @@ header-y += kvm_para.h
 header-y += mman.h
 header-y += msgbuf.h
 header-y += nvram.h
+header-y += opal-prd.h
 header-y += param.h
 header-y += perf_event.h
 header-y += poll.h
index de2c0e4ee1aab1c13d0ac60d0d2300849665c48c..43686043e29734b47b183882417e309e617d1039 100644 (file)
@@ -42,5 +42,6 @@
 #define PPC_FEATURE2_ISEL              0x08000000
 #define PPC_FEATURE2_TAR               0x04000000
 #define PPC_FEATURE2_VEC_CRYPTO                0x02000000
+#define PPC_FEATURE2_HTM_NOSC          0x01000000
 
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h
new file mode 100644 (file)
index 0000000..291b7d1
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2015
+ *
+ * Authors: Gavin Shan <gwshan@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_POWERPC_EEH_H
+#define _ASM_POWERPC_EEH_H
+
+/* PE states */
+#define EEH_PE_STATE_NORMAL            0       /* Normal state         */
+#define EEH_PE_STATE_RESET             1       /* PE reset asserted    */
+#define EEH_PE_STATE_STOPPED_IO_DMA    2       /* Frozen PE            */
+#define EEH_PE_STATE_STOPPED_DMA       4       /* Stopped DMA only     */
+#define EEH_PE_STATE_UNAVAIL           5       /* Unavailable          */
+
+/* EEH error types and functions */
+#define EEH_ERR_TYPE_32                        0       /* 32-bits error        */
+#define EEH_ERR_TYPE_64                        1       /* 64-bits error        */
+#define EEH_ERR_FUNC_MIN               0
+#define EEH_ERR_FUNC_LD_MEM_ADDR       0       /* Memory load  */
+#define EEH_ERR_FUNC_LD_MEM_DATA       1
+#define EEH_ERR_FUNC_LD_IO_ADDR                2       /* IO load      */
+#define EEH_ERR_FUNC_LD_IO_DATA                3
+#define EEH_ERR_FUNC_LD_CFG_ADDR       4       /* Config load  */
+#define EEH_ERR_FUNC_LD_CFG_DATA       5
+#define EEH_ERR_FUNC_ST_MEM_ADDR       6       /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA       7
+#define EEH_ERR_FUNC_ST_IO_ADDR                8       /* IO store     */
+#define EEH_ERR_FUNC_ST_IO_DATA                9
+#define EEH_ERR_FUNC_ST_CFG_ADDR       10      /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA       11
+#define EEH_ERR_FUNC_DMA_RD_ADDR       12      /* DMA read     */
+#define EEH_ERR_FUNC_DMA_RD_DATA       13
+#define EEH_ERR_FUNC_DMA_RD_MASTER     14
+#define EEH_ERR_FUNC_DMA_RD_TARGET     15
+#define EEH_ERR_FUNC_DMA_WR_ADDR       16      /* DMA write    */
+#define EEH_ERR_FUNC_DMA_WR_DATA       17
+#define EEH_ERR_FUNC_DMA_WR_MASTER     18
+#define EEH_ERR_FUNC_DMA_WR_TARGET     19
+#define EEH_ERR_FUNC_MAX               19
+
+#endif /* _ASM_POWERPC_EEH_H */
diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h
new file mode 100644 (file)
index 0000000..319ff4a
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * (C) Copyright IBM 2015
+ *
+ * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_
+#define _UAPI_ASM_POWERPC_OPAL_PRD_H_
+
+#include <linux/types.h>
+
+/**
+ * The version of the kernel interface of the PRD system. This describes the
+ * interface available for the /dev/opal-prd device. The actual PRD message
+ * layout and content is private to the firmware <--> userspace interface, so
+ * is not covered by this versioning.
+ *
+ * Future interface versions are backwards-compatible; if a later kernel
+ * version is encountered, functionality provided in earlier versions
+ * will work.
+ */
+#define OPAL_PRD_KERNEL_VERSION                1
+
+#define OPAL_PRD_GET_INFO              _IOR('o', 0x01, struct opal_prd_info)
+#define OPAL_PRD_SCOM_READ             _IOR('o', 0x02, struct opal_prd_scom)
+#define OPAL_PRD_SCOM_WRITE            _IOW('o', 0x03, struct opal_prd_scom)
+
+#ifndef __ASSEMBLY__
+
+struct opal_prd_info {
+       __u64   version;
+       __u64   reserved[3];
+};
+
+struct opal_prd_scom {
+       __u64   chip;
+       __u64   addr;
+       __u64   data;
+       __s64   rc;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_POWERPC_OPAL_PRD_H */
index 5d836b7c1176242ae2c943c0bcc81cf6bcb031cf..5047659815a54ffd69037b245eebd9f6c64a679d 100644 (file)
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED       0xde
 #define TM_CAUSE_TLBI          0xdc
 #define TM_CAUSE_FAC_UNAV      0xda
-#define TM_CAUSE_SYSCALL       0xd8  /* future use */
+#define TM_CAUSE_SYSCALL       0xd8
 #define TM_CAUSE_MISC          0xd6  /* future use */
 #define TM_CAUSE_SIGNAL                0xd4
 #define TM_CAUSE_ALIGNMENT     0xd2
index c1ebbdaac28fb85e37cb1fdf34c96e2e1175a977..87c7d1473488a95fc5956fa2f80bcef543b74b69 100644 (file)
@@ -33,11 +33,12 @@ obj-y                               := cputable.o ptrace.o syscalls.o \
                                   signal.o sysfs.o cacheinfo.o time.o \
                                   prom.o traps.o setup-common.o \
                                   udbg.o misc.o io.o dma.o \
-                                  misc_$(CONFIG_WORD_SIZE).o vdso32/ \
+                                  misc_$(CONFIG_WORD_SIZE).o \
                                   of_platform.o prom_parse.o
 obj-$(CONFIG_PPC64)            += setup_64.o sys_ppc32.o \
                                   signal_64.o ptrace32.o \
                                   paca.o nvram_64.o firmware.o
+obj-$(CONFIG_VDSO32)           += vdso32/
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o
index 0034b6b3556a4f6b571ec60fdd55c02535c6ea97..98230579d99c74267c9dc2fd1ef0df887b52dd2d 100644 (file)
@@ -247,7 +247,7 @@ int main(void)
 #endif
        DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
        DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
-       DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
+       DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default));
        DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
        DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
        DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
index 60262fdf35babd4909508b9a4d9a214f215ac03a..7d80bfdfb15eef4fb4abf8390d0724b688e8f6f8 100644 (file)
@@ -108,7 +108,9 @@ extern void __restore_cpu_e6500(void);
                                 PPC_FEATURE_TRUE_LE | \
                                 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
 #define COMMON_USER2_POWER8    (PPC_FEATURE2_ARCH_2_07 | \
-                                PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+                                PPC_FEATURE2_HTM_COMP | \
+                                PPC_FEATURE2_HTM_NOSC_COMP | \
+                                PPC_FEATURE2_DSCR | \
                                 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
                                 PPC_FEATURE2_VEC_CRYPTO)
 #define COMMON_USER_PA6T       (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
index 484b2d4462c10cd954aad0a5e9cb7776022b1db1..35e4dcc5dce362eb941fc18eabe39f797c90fba1 100644 (file)
@@ -248,6 +248,14 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 {
        if (ppc_md.dma_set_mask)
                return ppc_md.dma_set_mask(dev, dma_mask);
+
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+               struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+               if (phb->controller_ops.dma_set_mask)
+                       return phb->controller_ops.dma_set_mask(pdev, dma_mask);
+       }
+
        return __dma_set_mask(dev, dma_mask);
 }
 EXPORT_SYMBOL(dma_set_mask);
index 9ee61d15653d6ec46546b93ea72601072176e4b7..af9b597b10af65192368dbf5881a5fa7929cab4a 100644 (file)
@@ -144,8 +144,6 @@ struct eeh_stats {
 
 static struct eeh_stats eeh_stats;
 
-#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-
 static int __init eeh_setup(char *str)
 {
        if (!strcmp(str, "off"))
@@ -719,7 +717,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
 
        /* The caller should restore state for the specified device */
        if (pdev != dev)
-               pci_save_state(pdev);
+               pci_restore_state(pdev);
 
        return NULL;
 }
@@ -1412,13 +1410,11 @@ static int dev_has_iommu_table(struct device *dev, void *data)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
        struct pci_dev **ppdev = data;
-       struct iommu_table *tbl;
 
        if (!dev)
                return 0;
 
-       tbl = get_iommu_table_base(dev);
-       if (tbl && tbl->it_group) {
+       if (dev->iommu_group) {
                *ppdev = pdev;
                return 1;
        }
@@ -1647,6 +1643,41 @@ int eeh_pe_configure(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
 
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+                     unsigned long addr, unsigned long mask)
+{
+       /* Invalid PE ? */
+       if (!pe)
+               return -ENODEV;
+
+       /* Unsupported operation ? */
+       if (!eeh_ops || !eeh_ops->err_inject)
+               return -ENOENT;
+
+       /* Check on PCI error type */
+       if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+               return -EINVAL;
+
+       /* Check on PCI error function */
+       if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+               return -EINVAL;
+
+       return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
        if (!eeh_enabled()) {
index eeabeabea49c41cec4da2532b290db2fb1c0679b..a1e86e172e3cca3b24cf74ecc503248ddcd5e3d2 100644 (file)
  */
 struct pci_io_addr_range {
        struct rb_node rb_node;
-       unsigned long addr_lo;
-       unsigned long addr_hi;
+       resource_size_t addr_lo;
+       resource_size_t addr_hi;
        struct eeh_dev *edev;
        struct pci_dev *pcidev;
-       unsigned int flags;
+       unsigned long flags;
 };
 
 static struct pci_io_addr_cache {
@@ -125,8 +125,8 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
 
 /* Insert address range into the rb tree. */
 static struct pci_io_addr_range *
-eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
-                     unsigned long ahi, unsigned int flags)
+eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
+                     resource_size_t ahi, unsigned long flags)
 {
        struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
        struct rb_node *parent = NULL;
@@ -197,9 +197,9 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
 
        /* Walk resources on this device, poke them into the tree */
        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-               unsigned long start = pci_resource_start(dev,i);
-               unsigned long end = pci_resource_end(dev,i);
-               unsigned int flags = pci_resource_flags(dev,i);
+               resource_size_t start = pci_resource_start(dev,i);
+               resource_size_t end = pci_resource_end(dev,i);
+               unsigned long flags = pci_resource_flags(dev,i);
 
                /* We are interested only bus addresses, not dma or other stuff */
                if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
index 24768ff3cb7308345248ada7a70f60421c7aea46..89eb4bc34d3a8934a0a15c4d2c428f373e5eb0ba 100644 (file)
@@ -660,7 +660,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
        eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 
        /* Get the current PCI slot state. This can take a long time,
-        * sometimes over 3 seconds for certain systems.
+        * sometimes over 300 seconds for certain systems.
         */
        rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
        if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
index afbc20019c2efba2b81b7cd6298941753d9776b5..579e0f9a2d5700dd0cbbc3034f1dce09b2e49e70 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/ftrace.h>
 #include <asm/hw_irq.h>
 #include <asm/context_tracking.h>
+#include <asm/tm.h>
 
 /*
  * System calls.
@@ -51,6 +52,12 @@ exception_marker:
 
        .globl system_call_common
 system_call_common:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+       extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+       bne     tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
        andi.   r10,r12,MSR_PR
        mr      r10,r1
        addi    r1,r1,-INT_FRAME_SIZE
@@ -311,6 +318,34 @@ syscall_exit_work:
        bl      do_syscall_trace_leave
        b       ret_from_except
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tabort_syscall:
+       /* Firstly we need to enable TM in the kernel */
+       mfmsr   r10
+       li      r13, 1
+       rldimi  r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+       mtmsrd  r10, 0
+
+       /* tabort, this dooms the transaction, nothing else */
+       li      r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+       TABORT(R13)
+
+       /*
+        * Return directly to userspace. We have corrupted user register state,
+        * but userspace will never see that register state. Execution will
+        * resume after the tbegin of the aborted transaction with the
+        * checkpointed register state.
+        */
+       li      r13, MSR_RI
+       andc    r10, r10, r13
+       mtmsrd  r10, 1
+       mtspr   SPRN_SRR0, r11
+       mtspr   SPRN_SRR1, r12
+
+       rfid
+       b       .       /* prevent speculative execution */
+#endif
+
 /* Save non-volatile GPRs, if not already saved. */
 _GLOBAL(save_nvgprs)
        ld      r11,_TRAP(r1)
@@ -556,7 +591,7 @@ BEGIN_FTR_SECTION
        ld      r0,THREAD_DSCR(r4)
        cmpwi   r6,0
        bne     1f
-       ld      r0,PACA_DSCR(r13)
+       ld      r0,PACA_DSCR_DEFAULT(r13)
 1:
 BEGIN_FTR_SECTION_NESTED(70)
        mfspr   r8, SPRN_FSCR
index 9519e6bdc6d75c324334bf4f0f52dd6da9d9bbcc..0a0399c2af119c1c63efe094b1404f3250045769 100644 (file)
@@ -59,14 +59,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)                              \
 
 #if defined(CONFIG_RELOCATABLE)
        /*
-        * We can't branch directly; in the direct case we use LR
-        * and system_call_entry restores LR.  (We thus need to move
-        * LR to r10 in the RFID case too.)
+        * We can't branch directly so we do it via the CTR which
+        * is volatile across system calls.
         */
 #define SYSCALL_PSERIES_2_DIRECT                               \
        mflr    r10 ;                                           \
        ld      r12,PACAKBASE(r13) ;                            \
-       LOAD_HANDLER(r12, system_call_entry_direct) ;           \
+       LOAD_HANDLER(r12, system_call_entry) ;                  \
        mtctr   r12 ;                                           \
        mfspr   r12,SPRN_SRR1 ;                                 \
        /* Re-use of r13... No spare regs to do this */ \
@@ -80,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)                                \
        mfspr   r12,SPRN_SRR1 ;                                 \
        li      r10,MSR_RI ;                                    \
        mtmsrd  r10,1 ;                 /* Set RI (EE=0) */     \
-       b       system_call_entry_direct ;
+       b       system_call_common ;
 #endif
 
 /*
@@ -969,13 +968,6 @@ hv_facility_unavailable_relon_trampoline:
 __end_interrupts:
 
        .align  7
-system_call_entry_direct:
-#if defined(CONFIG_RELOCATABLE)
-       /* The first level prologue may have used LR to get here, saving
-        * orig in r10.  To save hacking/ifdeffing common code, restore here.
-        */
-       mtlr    r10
-#endif
 system_call_entry:
        b       system_call_common
 
index 9b53fe139bf6f53c4227d713d1f969f3f0c718b8..78c1eba4c04a432ea233983e81d9da7a87071834 100644 (file)
        mtspr   spr, reg
 #endif
 
+/* Macro to test if an address is a kernel address */
+#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
+#define IS_KERNEL(tmp, addr)           \
+       andis.  tmp, addr, 0x8000       /* Address >= 0x80000000 */
+#define BRANCH_UNLESS_KERNEL(label)    beq     label
+#else
+#define IS_KERNEL(tmp, addr)           \
+       rlwinm  tmp, addr, 16, 16, 31;  \
+       cmpli   cr0, tmp, PAGE_OFFSET >> 16
+#define BRANCH_UNLESS_KERNEL(label)    blt     label
+#endif
+
+
 /*
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
@@ -116,13 +129,13 @@ turn_on_mmu:
  */
 #define EXCEPTION_PROLOG       \
        EXCEPTION_PROLOG_0;     \
+       mfcr    r10;            \
        EXCEPTION_PROLOG_1;     \
        EXCEPTION_PROLOG_2
 
 #define EXCEPTION_PROLOG_0     \
        mtspr   SPRN_SPRG_SCRATCH0,r10; \
-       mtspr   SPRN_SPRG_SCRATCH1,r11; \
-       mfcr    r10
+       mtspr   SPRN_SPRG_SCRATCH1,r11
 
 #define EXCEPTION_PROLOG_1     \
        mfspr   r11,SPRN_SRR1;          /* check whether user or kernel */ \
@@ -162,7 +175,6 @@ turn_on_mmu:
  * Exception exit code.
  */
 #define EXCEPTION_EPILOG_0     \
-       mtcr    r10;            \
        mfspr   r10,SPRN_SPRG_SCRATCH0; \
        mfspr   r11,SPRN_SPRG_SCRATCH1
 
@@ -297,19 +309,22 @@ SystemCall:
  * We have to use the MD_xxx registers for the tablewalk because the
  * equivalent MI_xxx registers only perform the attribute functions.
  */
+
+#ifdef CONFIG_8xx_CPU15
+#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr)     \
+       addi    tmp, addr, PAGE_SIZE;   \
+       tlbie   tmp;                    \
+       addi    tmp, addr, -PAGE_SIZE;  \
+       tlbie   tmp
+#else
+#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr)
+#endif
+
 InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
-       mtspr   SPRN_DAR, r3
+       mtspr   SPRN_SPRG_SCRATCH2, r3
 #endif
        EXCEPTION_PROLOG_0
-       mtspr   SPRN_SPRG_SCRATCH2, r10
-       mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-#ifdef CONFIG_8xx_CPU15
-       addi    r11, r10, PAGE_SIZE
-       tlbie   r11
-       addi    r11, r10, -PAGE_SIZE
-       tlbie   r11
-#endif
 
        /* If we are faulting a kernel address, we have to use the
         * kernel page tables.
@@ -317,24 +332,34 @@ InstructionTLBMiss:
 #ifdef CONFIG_MODULES
        /* Only modules will cause ITLB Misses as we always
         * pin the first 8MB of kernel memory */
-       andis.  r11, r10, 0x8000        /* Address >= 0x80000000 */
-#endif
+       mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+       INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
+       mfcr    r10
+       IS_KERNEL(r11, r11)
        mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-#ifdef CONFIG_MODULES
-       beq     3f
+       BRANCH_UNLESS_KERNEL(3f)
        lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
+       mtcr    r10
+       mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+#else
+       mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+       INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
+       mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
 #endif
        /* Insert level 1 index */
        rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
        lwz     r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)        /* Get the level 1 entry */
 
-       /* Load the MI_TWC with the attributes for this "segment." */
-       MTSPR_CPU6(SPRN_MI_TWC, r11, r3)        /* Set segment attributes */
-       rlwinm  r11, r11,0,0,19 /* Extract page descriptor page address */
        /* Extract level 2 index */
        rlwinm  r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
-       lwzx    r10, r10, r11   /* Get the pte */
+       rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1     /* Add level 2 base */
+       lwz     r10, 0(r10)     /* Get the pte */
+
+       /* Insert the APG into the TWC from the Linux PTE. */
+       rlwimi  r11, r10, 0, 25, 26
+       /* Load the MI_TWC with the attributes for this "segment." */
+       MTSPR_CPU6(SPRN_MI_TWC, r11, r3)        /* Set segment attributes */
 
 #ifdef CONFIG_SWAP
        rlwinm  r11, r10, 32-5, _PAGE_PRESENT
@@ -343,40 +368,41 @@ InstructionTLBMiss:
 #endif
        li      r11, RPN_PATTERN
        /* The Linux PTE won't go exactly into the MMU TLB.
-        * Software indicator bits 21 and 28 must be clear.
+        * Software indicator bits 20-23 and 28 must be clear.
         * Software indicator bits 24, 25, 26, and 27 must be
         * set.  All other Linux PTE bits control the behavior
         * of the MMU.
         */
-       rlwimi  r10, r11, 0, 0x07f8     /* Set 24-27, clear 21-23,28 */
+       rlwimi  r10, r11, 0, 0x0ff8     /* Set 24-27, clear 20-23,28 */
        MTSPR_CPU6(SPRN_MI_RPN, r10, r3)        /* Update TLB entry */
 
        /* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-       mfspr   r3, SPRN_DAR
-       mtspr   SPRN_DAR, r11   /* Tag DAR */
+       mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
-       mfspr   r10, SPRN_SPRG_SCRATCH2
        EXCEPTION_EPILOG_0
        rfi
 
        . = 0x1200
 DataStoreTLBMiss:
 #ifdef CONFIG_8xx_CPU6
-       mtspr   SPRN_DAR, r3
+       mtspr   SPRN_SPRG_SCRATCH2, r3
 #endif
        EXCEPTION_PROLOG_0
-       mtspr   SPRN_SPRG_SCRATCH2, r10
-       mfspr   r10, SPRN_MD_EPN
+       mfcr    r10
 
        /* If we are faulting a kernel address, we have to use the
         * kernel page tables.
         */
-       andis.  r11, r10, 0x8000
+       mfspr   r11, SPRN_MD_EPN
+       IS_KERNEL(r11, r11)
        mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-       beq     3f
+       BRANCH_UNLESS_KERNEL(3f)
        lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
+       mtcr    r10
+       mfspr   r10, SPRN_MD_EPN
+
        /* Insert level 1 index */
        rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
        lwz     r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)        /* Get the level 1 entry */
@@ -388,13 +414,13 @@ DataStoreTLBMiss:
        rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1     /* Add level 2 base */
        lwz     r10, 0(r10)     /* Get the pte */
 
-       /* Insert the Guarded flag into the TWC from the Linux PTE.
-        * It is bit 27 of both the Linux PTE and the TWC (at least
+       /* Insert the Guarded flag and APG into the TWC from the Linux PTE.
+        * It is bit 26-27 of both the Linux PTE and the TWC (at least
         * I got that right :-).  It will be better when we can put
         * this into the Linux pgd/pmd and load it in the operation
         * above.
         */
-       rlwimi  r11, r10, 0, 27, 27
+       rlwimi  r11, r10, 0, 26, 27
        /* Insert the WriteThru flag into the TWC from the Linux PTE.
         * It is bit 25 in the Linux PTE and bit 30 in the TWC
         */
@@ -423,14 +449,14 @@ DataStoreTLBMiss:
         */
        li      r11, RPN_PATTERN
        rlwimi  r10, r11, 0, 24, 28     /* Set 24-27, clear 28 */
+       rlwimi  r10, r11, 0, 20, 20     /* clear 20 */
        MTSPR_CPU6(SPRN_MD_RPN, r10, r3)        /* Update TLB entry */
 
        /* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-       mfspr   r3, SPRN_DAR
+       mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-       mfspr   r10, SPRN_SPRG_SCRATCH2
        EXCEPTION_EPILOG_0
        rfi
 
@@ -456,6 +482,7 @@ InstructionTLBError:
        . = 0x1400
 DataTLBError:
        EXCEPTION_PROLOG_0
+       mfcr    r10
 
        mfspr   r11, SPRN_DAR
        cmpwi   cr0, r11, RPN_PATTERN
@@ -503,9 +530,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
        mtspr   SPRN_SPRG_SCRATCH2, r10
        /* fetch instruction from memory. */
        mfspr   r10, SPRN_SRR0
-       andis.  r11, r10, 0x8000        /* Address >= 0x80000000 */
+       IS_KERNEL(r11, r10)
        mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-       beq     3f
+       BRANCH_UNLESS_KERNEL(3f)
        lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
        /* Insert level 1 index */
 3:     rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -743,15 +770,20 @@ initial_mmu:
        ori     r8, r8, MI_EVALID       /* Mark it valid */
        mtspr   SPRN_MI_EPN, r8
        mtspr   SPRN_MD_EPN, r8
-       li      r8, MI_PS8MEG           /* Set 8M byte page */
+       li      r8, MI_PS8MEG | (2 << 5)        /* Set 8M byte page, APG 2 */
        ori     r8, r8, MI_SVALID       /* Make it valid */
        mtspr   SPRN_MI_TWC, r8
+       li      r8, MI_PS8MEG           /* Set 8M byte page, APG 0 */
+       ori     r8, r8, MI_SVALID       /* Make it valid */
        mtspr   SPRN_MD_TWC, r8
        li      r8, MI_BOOTINIT         /* Create RPN for address 0 */
        mtspr   SPRN_MI_RPN, r8         /* Store TLB entry */
        mtspr   SPRN_MD_RPN, r8
-       lis     r8, MI_Kp@h             /* Set the protection mode */
+       lis     r8, MI_APG_INIT@h       /* Set protection modes */
+       ori     r8, r8, MI_APG_INIT@l
        mtspr   SPRN_MI_AP, r8
+       lis     r8, MD_APG_INIT@h
+       ori     r8, r8, MD_APG_INIT@l
        mtspr   SPRN_MD_AP, r8
 
        /* Map another 8 MByte at the IMMR to get the processor
index 15448668988dd85a902b830fba8ef020c2557cb0..b9b6ef510be1ec366576a718423b6c319e8e4c06 100644 (file)
@@ -58,15 +58,6 @@ BEGIN_FTR_SECTION
        mtlr    r0
        lis     r3,HID0_NAP@h
 END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-BEGIN_FTR_SECTION
-       msync
-       li      r7,L2CSR0_L2FL@l
-       mtspr   SPRN_L2CSR0,r7
-2:
-       mfspr   r7,SPRN_L2CSR0
-       andi.   r4,r7,L2CSR0_L2FL@l
-       bne     2b
-END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
 1:
        /* Go to NAP or DOZE now */
        mfspr   r4,SPRN_HID0
index b054f33ab1fbcdad3bff7fda332c9e55dcda1d2f..a8e3490b54e3b828e7821ab3152d03192cef150d 100644 (file)
@@ -322,11 +322,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
        ret = entry << tbl->it_page_shift;      /* Set the return dma address */
 
        /* Put the TCEs in the HW table */
-       build_fail = ppc_md.tce_build(tbl, entry, npages,
+       build_fail = tbl->it_ops->set(tbl, entry, npages,
                                      (unsigned long)page &
                                      IOMMU_PAGE_MASK(tbl), direction, attrs);
 
-       /* ppc_md.tce_build() only returns non-zero for transient errors.
+       /* tbl->it_ops->set() only returns non-zero for transient errors.
         * Clean up the table bitmap in this case and return
         * DMA_ERROR_CODE. For all other errors the functionality is
         * not altered.
@@ -337,8 +337,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
        }
 
        /* Flush/invalidate TLB caches if necessary */
-       if (ppc_md.tce_flush)
-               ppc_md.tce_flush(tbl);
+       if (tbl->it_ops->flush)
+               tbl->it_ops->flush(tbl);
 
        /* Make sure updates are seen by hardware */
        mb();
@@ -408,7 +408,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
        if (!iommu_free_check(tbl, dma_addr, npages))
                return;
 
-       ppc_md.tce_free(tbl, entry, npages);
+       tbl->it_ops->clear(tbl, entry, npages);
 
        spin_lock_irqsave(&(pool->lock), flags);
        bitmap_clear(tbl->it_map, free_entry, npages);
@@ -424,8 +424,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
         * not do an mb() here on purpose, it is not needed on any of
         * the current platforms.
         */
-       if (ppc_md.tce_flush)
-               ppc_md.tce_flush(tbl);
+       if (tbl->it_ops->flush)
+               tbl->it_ops->flush(tbl);
 }
 
 int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
@@ -495,7 +495,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
                            npages, entry, dma_addr);
 
                /* Insert into HW table */
-               build_fail = ppc_md.tce_build(tbl, entry, npages,
+               build_fail = tbl->it_ops->set(tbl, entry, npages,
                                              vaddr & IOMMU_PAGE_MASK(tbl),
                                              direction, attrs);
                if(unlikely(build_fail))
@@ -534,8 +534,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
        }
 
        /* Flush/invalidate TLB caches if necessary */
-       if (ppc_md.tce_flush)
-               ppc_md.tce_flush(tbl);
+       if (tbl->it_ops->flush)
+               tbl->it_ops->flush(tbl);
 
        DBG("mapped %d elements:\n", outcount);
 
@@ -600,8 +600,8 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
         * do not do an mb() here, the affected platforms do not need it
         * when freeing.
         */
-       if (ppc_md.tce_flush)
-               ppc_md.tce_flush(tbl);
+       if (tbl->it_ops->flush)
+               tbl->it_ops->flush(tbl);
 }
 
 static void iommu_table_clear(struct iommu_table *tbl)
@@ -613,17 +613,17 @@ static void iommu_table_clear(struct iommu_table *tbl)
         */
        if (!is_kdump_kernel() || is_fadump_active()) {
                /* Clear the table in case firmware left allocations in it */
-               ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+               tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
                return;
        }
 
 #ifdef CONFIG_CRASH_DUMP
-       if (ppc_md.tce_get) {
+       if (tbl->it_ops->get) {
                unsigned long index, tceval, tcecount = 0;
 
                /* Reserve the existing mappings left by the first kernel. */
                for (index = 0; index < tbl->it_size; index++) {
-                       tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+                       tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
                        /*
                         * Freed TCE entry contains 0x7fffffffffffffff on JS20
                         */
@@ -657,6 +657,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
        unsigned int i;
        struct iommu_pool *p;
 
+       BUG_ON(!tbl->it_ops);
+
        /* number of bytes needed for the bitmap */
        sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
 
@@ -713,9 +715,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
        unsigned long bitmap_sz;
        unsigned int order;
 
-       if (!tbl || !tbl->it_map) {
-               printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
-                               node_name);
+       if (!tbl)
+               return;
+
+       if (!tbl->it_map) {
+               kfree(tbl);
                return;
        }
 
@@ -726,13 +730,6 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
        if (tbl->it_offset == 0)
                clear_bit(0, tbl->it_map);
 
-#ifdef CONFIG_IOMMU_API
-       if (tbl->it_group) {
-               iommu_group_put(tbl->it_group);
-               BUG_ON(tbl->it_group);
-       }
-#endif
-
        /* verify that table contains no entries */
        if (!bitmap_empty(tbl->it_map, tbl->it_size))
                pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -871,17 +868,33 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
        }
 }
 
+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
+{
+       switch (dir) {
+       case DMA_BIDIRECTIONAL:
+               return TCE_PCI_READ | TCE_PCI_WRITE;
+       case DMA_FROM_DEVICE:
+               return TCE_PCI_WRITE;
+       case DMA_TO_DEVICE:
+               return TCE_PCI_READ;
+       default:
+               return 0;
+       }
+}
+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+
 #ifdef CONFIG_IOMMU_API
 /*
  * SPAPR TCE API
  */
 static void group_release(void *iommu_data)
 {
-       struct iommu_table *tbl = iommu_data;
-       tbl->it_group = NULL;
+       struct iommu_table_group *table_group = iommu_data;
+
+       table_group->group = NULL;
 }
 
-void iommu_register_group(struct iommu_table *tbl,
+void iommu_register_group(struct iommu_table_group *table_group,
                int pci_domain_number, unsigned long pe_num)
 {
        struct iommu_group *grp;
@@ -893,8 +906,8 @@ void iommu_register_group(struct iommu_table *tbl,
                                PTR_ERR(grp));
                return;
        }
-       tbl->it_group = grp;
-       iommu_group_set_iommudata(grp, tbl, group_release);
+       table_group->group = grp;
+       iommu_group_set_iommudata(grp, table_group, group_release);
        name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
                        pci_domain_number, pe_num);
        if (!name)
@@ -919,8 +932,8 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction);
 void iommu_flush_tce(struct iommu_table *tbl)
 {
        /* Flush/invalidate TLB caches if necessary */
-       if (ppc_md.tce_flush)
-               ppc_md.tce_flush(tbl);
+       if (tbl->it_ops->flush)
+               tbl->it_ops->flush(tbl);
 
        /* Make sure updates are seen by hardware */
        mb();
@@ -931,7 +944,7 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
                unsigned long ioba, unsigned long tce_value,
                unsigned long npages)
 {
-       /* ppc_md.tce_free() does not support any value but 0 */
+       /* tbl->it_ops->clear() does not support any value but 0 */
        if (tce_value)
                return -EINVAL;
 
@@ -952,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
 int iommu_tce_put_param_check(struct iommu_table *tbl,
                unsigned long ioba, unsigned long tce)
 {
-       if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
-               return -EINVAL;
-
-       if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
+       if (tce & ~IOMMU_PAGE_MASK(tbl))
                return -EINVAL;
 
        if (ioba & ~IOMMU_PAGE_MASK(tbl))
@@ -972,68 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
 }
 EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
 
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
-{
-       unsigned long oldtce;
-       struct iommu_pool *pool = get_pool(tbl, entry);
-
-       spin_lock(&(pool->lock));
-
-       oldtce = ppc_md.tce_get(tbl, entry);
-       if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
-               ppc_md.tce_free(tbl, entry, 1);
-       else
-               oldtce = 0;
-
-       spin_unlock(&(pool->lock));
-
-       return oldtce;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
-
-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
-               unsigned long entry, unsigned long pages)
-{
-       unsigned long oldtce;
-       struct page *page;
-
-       for ( ; pages; --pages, ++entry) {
-               oldtce = iommu_clear_tce(tbl, entry);
-               if (!oldtce)
-                       continue;
-
-               page = pfn_to_page(oldtce >> PAGE_SHIFT);
-               WARN_ON(!page);
-               if (page) {
-                       if (oldtce & TCE_PCI_WRITE)
-                               SetPageDirty(page);
-                       put_page(page);
-               }
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
-
-/*
- * hwaddr is a kernel virtual address here (0xc... bazillion),
- * tce_build converts it to a physical address.
- */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-               unsigned long hwaddr, enum dma_data_direction direction)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+               unsigned long *hpa, enum dma_data_direction *direction)
 {
-       int ret = -EBUSY;
-       unsigned long oldtce;
-       struct iommu_pool *pool = get_pool(tbl, entry);
-
-       spin_lock(&(pool->lock));
+       long ret;
 
-       oldtce = ppc_md.tce_get(tbl, entry);
-       /* Add new entry if it is not busy */
-       if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
-               ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
+       ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
 
-       spin_unlock(&(pool->lock));
+       if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+                       (*direction == DMA_BIDIRECTIONAL)))
+               SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
 
        /* if (unlikely(ret))
                pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
@@ -1042,84 +1000,72 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(iommu_tce_build);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg);
 
-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
-               unsigned long tce)
+int iommu_take_ownership(struct iommu_table *tbl)
 {
-       int ret;
-       struct page *page = NULL;
-       unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
-       enum dma_data_direction direction = iommu_tce_direction(tce);
-
-       ret = get_user_pages_fast(tce & PAGE_MASK, 1,
-                       direction != DMA_TO_DEVICE, &page);
-       if (unlikely(ret != 1)) {
-               /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
-                               tce, entry << tbl->it_page_shift, ret); */
-               return -EFAULT;
-       }
-       hwaddr = (unsigned long) page_address(page) + offset;
-
-       ret = iommu_tce_build(tbl, entry, hwaddr, direction);
-       if (ret)
-               put_page(page);
-
-       if (ret < 0)
-               pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
-                       __func__, entry << tbl->it_page_shift, tce, ret);
+       unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+       int ret = 0;
 
-       return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
+       /*
+        * VFIO does not control TCE entries allocation and the guest
+        * can write new TCEs on top of existing ones so iommu_tce_build()
+        * must be able to release old pages. This functionality
+        * requires exchange() callback defined so if it is not
+        * implemented, we disallow taking ownership over the table.
+        */
+       if (!tbl->it_ops->exchange)
+               return -EINVAL;
 
-int iommu_take_ownership(struct iommu_table *tbl)
-{
-       unsigned long sz = (tbl->it_size + 7) >> 3;
+       spin_lock_irqsave(&tbl->large_pool.lock, flags);
+       for (i = 0; i < tbl->nr_pools; i++)
+               spin_lock(&tbl->pools[i].lock);
 
        if (tbl->it_offset == 0)
                clear_bit(0, tbl->it_map);
 
        if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
                pr_err("iommu_tce: it_map is not empty");
-               return -EBUSY;
+               ret = -EBUSY;
+               /* Restore bit#0 set by iommu_init_table() */
+               if (tbl->it_offset == 0)
+                       set_bit(0, tbl->it_map);
+       } else {
+               memset(tbl->it_map, 0xff, sz);
        }
 
-       memset(tbl->it_map, 0xff, sz);
-       iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+       for (i = 0; i < tbl->nr_pools; i++)
+               spin_unlock(&tbl->pools[i].lock);
+       spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
 
-       /*
-        * Disable iommu bypass, otherwise the user can DMA to all of
-        * our physical memory via the bypass window instead of just
-        * the pages that has been explicitly mapped into the iommu
-        */
-       if (tbl->set_bypass)
-               tbl->set_bypass(tbl, false);
-
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_take_ownership);
 
 void iommu_release_ownership(struct iommu_table *tbl)
 {
-       unsigned long sz = (tbl->it_size + 7) >> 3;
+       unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+
+       spin_lock_irqsave(&tbl->large_pool.lock, flags);
+       for (i = 0; i < tbl->nr_pools; i++)
+               spin_lock(&tbl->pools[i].lock);
 
-       iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
        memset(tbl->it_map, 0, sz);
 
        /* Restore bit#0 set by iommu_init_table() */
        if (tbl->it_offset == 0)
                set_bit(0, tbl->it_map);
 
-       /* The kernel owns the device now, we can restore the iommu bypass */
-       if (tbl->set_bypass)
-               tbl->set_bypass(tbl, true);
+       for (i = 0; i < tbl->nr_pools; i++)
+               spin_unlock(&tbl->pools[i].lock);
+       spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
 }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
 int iommu_add_device(struct device *dev)
 {
        struct iommu_table *tbl;
+       struct iommu_table_group_link *tgl;
 
        /*
         * The sysfs entries should be populated before
@@ -1137,15 +1083,22 @@ int iommu_add_device(struct device *dev)
        }
 
        tbl = get_iommu_table_base(dev);
-       if (!tbl || !tbl->it_group) {
+       if (!tbl) {
                pr_debug("%s: Skipping device %s with no tbl\n",
                         __func__, dev_name(dev));
                return 0;
        }
 
+       tgl = list_first_entry_or_null(&tbl->it_group_list,
+                       struct iommu_table_group_link, next);
+       if (!tgl) {
+               pr_debug("%s: Skipping device %s with no group\n",
+                        __func__, dev_name(dev));
+               return 0;
+       }
        pr_debug("%s: Adding %s to iommu group %d\n",
                 __func__, dev_name(dev),
-                iommu_group_id(tbl->it_group));
+                iommu_group_id(tgl->table_group->group));
 
        if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
                pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
@@ -1154,7 +1107,7 @@ int iommu_add_device(struct device *dev)
                return -EINVAL;
        }
 
-       return iommu_group_add_device(tbl->it_group, dev);
+       return iommu_group_add_device(tgl->table_group->group, dev);
 }
 EXPORT_SYMBOL_GPL(iommu_add_device);
 
index 71bd161640cfc8d101e03117582229823d365676..dab616a33b8dbe283aa46c05b5492af69190650f 100644 (file)
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-       if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
+       struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+       if (!phb->controller_ops.setup_msi_irqs ||
+           !phb->controller_ops.teardown_msi_irqs) {
                pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
                return -ENOSYS;
        }
@@ -24,10 +27,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
        if (type == PCI_CAP_ID_MSI && nvec > 1)
                return 1;
 
-       return ppc_md.setup_msi_irqs(dev, nvec, type);
+       return phb->controller_ops.setup_msi_irqs(dev, nvec, type);
 }
 
 void arch_teardown_msi_irqs(struct pci_dev *dev)
 {
-       ppc_md.teardown_msi_irqs(dev);
+       struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+       phb->controller_ops.teardown_msi_irqs(dev);
 }
index 0d054068a21d5849bc949320bfcc5835ac92dfdb..b9de34d44fcb877c388bcb54c98cc82bfb2ac3f2 100644 (file)
@@ -89,6 +89,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 #endif
        return phb;
 }
+EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
 
 void pcibios_free_controller(struct pci_controller *phb)
 {
@@ -1447,6 +1448,7 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
        list_for_each_entry(child_bus, &bus->children, node)
                pcibios_claim_one_bus(child_bus);
 }
+EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
 
 
 /* pcibios_finish_adding_to_bus
@@ -1488,6 +1490,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
        return pci_enable_resources(dev, mask);
 }
 
+void pcibios_disable_device(struct pci_dev *dev)
+{
+       struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+       if (phb->controller_ops.disable_device)
+               phb->controller_ops.disable_device(dev);
+}
+
 resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
 {
        return (unsigned long) hose->io_base_virt - _IO_BASE;
@@ -1680,6 +1690,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
                        pcie_bus_configure_settings(child);
        }
 }
+EXPORT_SYMBOL_GPL(pcibios_scan_phb);
 
 static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
 {
index 7ed85a69a9c29b956d232596ba089cf610e9e1d2..7f9ed0c1f6b93d88dc4b3d8e815b7e1c099b1f82 100644 (file)
  */
 void pcibios_release_device(struct pci_dev *dev)
 {
+       struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
        eeh_remove_device(dev);
+
+       if (phb->controller_ops.release_device)
+               phb->controller_ops.release_device(dev);
 }
 
 /**
index febb50dd53285d8cbee941e5895bcbcb3bc77b8f..8005e18d1b40381f6b815890b0d58c99a382b4e8 100644 (file)
@@ -1112,7 +1112,6 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
 /*
  * Copy a thread..
  */
-extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
 
 /*
  * Copy architecture-specific thread state
index c69671c03c3b5b7dc9b3e1f70726abba12c5c4b1..bdcbb716f4d66845e56b619e958e85c3bde318fd 100644 (file)
@@ -523,7 +523,8 @@ void __init setup_system(void)
        smp_release_cpus();
 #endif
 
-       pr_info("Starting Linux PPC64 %s\n", init_utsname()->version);
+       pr_info("Starting Linux %s %s\n", init_utsname()->machine,
+                init_utsname()->version);
 
        pr_info("-----------------------------------------------------\n");
        pr_info("ppc64_pft_size    = 0x%llx\n", ppc64_pft_size);
@@ -685,6 +686,9 @@ void __init setup_arch(char **cmdline_p)
        init_mm.brk = klimit;
 #ifdef CONFIG_PPC_64K_PAGES
        init_mm.context.pte_frag = NULL;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+       mm_iommu_init(&init_mm.context);
 #endif
        irqstack_early_init();
        exc_lvl_early_init();
index fa1fd8a0c867f25611dfb6726e64cfd16ab39447..692873bff3341510730de0ec7e7d8c432ded989e 100644 (file)
@@ -496,13 +496,34 @@ static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
 
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
 static unsigned long dscr_default;
 
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val:       Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
 static void read_dscr(void *val)
 {
        *(unsigned long *)val = get_paca()->dscr_default;
 }
 
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val:       New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
 static void write_dscr(void *val)
 {
        get_paca()->dscr_default = *(unsigned long *)val;
@@ -520,12 +541,29 @@ static void add_write_permission_dev_attr(struct device_attribute *attr)
        attr->attr.mode |= 0200;
 }
 
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev:       Device structure
+ * @attr:      Device attribute structure
+ * @buf:       Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
 static ssize_t show_dscr_default(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
        return sprintf(buf, "%lx\n", dscr_default);
 }
 
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev:       Device structure
+ * @attr:      Device attribute structure
+ * @buf:       Interface buffer
+ * @count:     Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
 static ssize_t __used store_dscr_default(struct device *dev,
                struct device_attribute *attr, const char *buf,
                size_t count)
index 5754b226da7e5a2691ef27dc1d31b3cb0e519f72..bf8f34a5867088d1a2346d17b89e2daaabe365ab 100644 (file)
@@ -293,7 +293,7 @@ dont_backup_fp:
        ld      r2, STK_GOT(r1)
 
        /* Load CPU's default DSCR */
-       ld      r0, PACA_DSCR(r13)
+       ld      r0, PACA_DSCR_DEFAULT(r13)
        mtspr   SPRN_DSCR, r0
 
        blr
@@ -473,7 +473,7 @@ restore_gprs:
        ld      r2, STK_GOT(r1)
 
        /* Load CPU's default DSCR */
-       ld      r0, PACA_DSCR(r13)
+       ld      r0, PACA_DSCR_DEFAULT(r13)
        mtspr   SPRN_DSCR, r0
 
        blr
index 19e4744b6eba2a10dbc41d118e2de83021e3aed6..6530f1b8874dac16dc7f71480b5d748d0ccc268d 100644 (file)
@@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
        };
        char *facility = "unknown";
        u64 value;
+       u32 instword, rd;
        u8 status;
        bool hv;
 
@@ -1388,12 +1389,46 @@ void facility_unavailable_exception(struct pt_regs *regs)
 
        status = value >> 56;
        if (status == FSCR_DSCR_LG) {
-               /* User is acessing the DSCR.  Set the inherit bit and allow
-                * the user to set it directly in future by setting via the
-                * FSCR DSCR bit.  We always leave HFSCR DSCR set.
+               /*
+                * User is accessing the DSCR register using the problem
+                * state only SPR number (0x03) either through a mfspr or
+                * a mtspr instruction. If it is a write attempt through
+                * a mtspr, then we set the inherit bit. This also allows
+                * the user to write or read the register directly in the
+                * future by setting via the FSCR DSCR bit. But in case it
+                * is a read DSCR attempt through a mfspr instruction, we
+                * just emulate the instruction instead. This code path will
+                * always emulate all the mfspr instructions till the user
+                * has attempted atleast one mtspr instruction. This way it
+                * preserves the same behaviour when the user is accessing
+                * the DSCR through privilege level only SPR number (0x11)
+                * which is emulated through illegal instruction exception.
+                * We always leave HFSCR DSCR set.
                 */
-               current->thread.dscr_inherit = 1;
-               mtspr(SPRN_FSCR, value | FSCR_DSCR);
+               if (get_user(instword, (u32 __user *)(regs->nip))) {
+                       pr_err("Failed to fetch the user instruction\n");
+                       return;
+               }
+
+               /* Write into DSCR (mtspr 0x03, RS) */
+               if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
+                               == PPC_INST_MTSPR_DSCR_USER) {
+                       rd = (instword >> 21) & 0x1f;
+                       current->thread.dscr = regs->gpr[rd];
+                       current->thread.dscr_inherit = 1;
+                       mtspr(SPRN_FSCR, value | FSCR_DSCR);
+               }
+
+               /* Read from DSCR (mfspr RT, 0x03) */
+               if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
+                               == PPC_INST_MFSPR_DSCR_USER) {
+                       if (emulate_instruction(regs)) {
+                               pr_err("DSCR based mfspr emulation failed\n");
+                               return;
+                       }
+                       regs->nip += 4;
+                       emulate_single_step(regs);
+               }
                return;
        }
 
index 305eb0d9b76882d44e9f72805c951e1fa8c68b05..b457bfa2843603f9c920236372effe6304da3bed 100644 (file)
 /* The alignment of the vDSO */
 #define VDSO_ALIGNMENT (1 << 16)
 
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
 static unsigned int vdso32_pages;
+static void *vdso32_kbase;
 static struct page **vdso32_pagelist;
 unsigned long vdso32_sigtramp;
 unsigned long vdso32_rt_sigtramp;
 
+#ifdef CONFIG_VDSO32
+extern char vdso32_start, vdso32_end;
+#endif
+
 #ifdef CONFIG_PPC64
 extern char vdso64_start, vdso64_end;
 static void *vdso64_kbase = &vdso64_start;
@@ -140,50 +143,6 @@ struct lib64_elfinfo
 };
 
 
-#ifdef __DEBUG
-static void dump_one_vdso_page(struct page *pg, struct page *upg)
-{
-       printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
-              page_count(pg),
-              pg->flags);
-       if (upg && !IS_ERR(upg) /* && pg != upg*/) {
-               printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
-                                                      << PAGE_SHIFT),
-                      page_count(upg),
-                      upg->flags);
-       }
-       printk("\n");
-}
-
-static void dump_vdso_pages(struct vm_area_struct * vma)
-{
-       int i;
-
-       if (!vma || is_32bit_task()) {
-               printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
-               for (i=0; i<vdso32_pages; i++) {
-                       struct page *pg = virt_to_page(vdso32_kbase +
-                                                      i*PAGE_SIZE);
-                       struct page *upg = (vma && vma->vm_mm) ?
-                               follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
-                               : NULL;
-                       dump_one_vdso_page(pg, upg);
-               }
-       }
-       if (!vma || !is_32bit_task()) {
-               printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
-               for (i=0; i<vdso64_pages; i++) {
-                       struct page *pg = virt_to_page(vdso64_kbase +
-                                                      i*PAGE_SIZE);
-                       struct page *upg = (vma && vma->vm_mm) ?
-                               follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
-                               : NULL;
-                       dump_one_vdso_page(pg, upg);
-               }
-       }
-}
-#endif /* DEBUG */
-
 /*
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
@@ -292,6 +251,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
 
 
+#ifdef CONFIG_VDSO32
 static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
                                  unsigned long *size)
 {
@@ -379,6 +339,20 @@ static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
 
        return 0;
 }
+#else /* !CONFIG_VDSO32 */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib,
+                                           const char *symname)
+{
+       return 0;
+}
+
+static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
+                                      struct lib64_elfinfo *v64,
+                                      const char *orig, const char *fix)
+{
+       return 0;
+}
+#endif /* CONFIG_VDSO32 */
 
 
 #ifdef CONFIG_PPC64
@@ -489,6 +463,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
         * Locate symbol tables & text section
         */
 
+#ifdef CONFIG_VDSO32
        v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
        v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
        if (v32->dynsym == NULL || v32->dynstr == NULL) {
@@ -501,6 +476,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
                return -1;
        }
        v32->text = sect - vdso32_kbase;
+#endif
 
 #ifdef CONFIG_PPC64
        v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
@@ -537,7 +513,9 @@ static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
 static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
                                       struct lib64_elfinfo *v64)
 {
+#ifdef CONFIG_VDSO32
        Elf32_Sym *sym32;
+#endif
 #ifdef CONFIG_PPC64
        Elf64_Sym *sym64;
 
@@ -552,6 +530,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
                (sym64->st_value - VDSO64_LBASE);
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_VDSO32
        sym32 = find_symbol32(v32, "__kernel_datapage_offset");
        if (sym32 == NULL) {
                printk(KERN_ERR "vDSO32: Can't find symbol "
@@ -561,6 +540,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
        *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
                (vdso32_pages << PAGE_SHIFT) -
                (sym32->st_value - VDSO32_LBASE);
+#endif
 
        return 0;
 }
@@ -569,55 +549,54 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
 static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
                                      struct lib64_elfinfo *v64)
 {
-       void *start32;
-       unsigned long size32;
+       unsigned long size;
+       void *start;
 
 #ifdef CONFIG_PPC64
-       void *start64;
-       unsigned long size64;
-
-       start64 = find_section64(v64->hdr, "__ftr_fixup", &size64);
-       if (start64)
+       start = find_section64(v64->hdr, "__ftr_fixup", &size);
+       if (start)
                do_feature_fixups(cur_cpu_spec->cpu_features,
-                                 start64, start64 + size64);
+                                 start, start + size);
 
-       start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64);
-       if (start64)
+       start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size);
+       if (start)
                do_feature_fixups(cur_cpu_spec->mmu_features,
-                                 start64, start64 + size64);
+                                 start, start + size);
 
-       start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64);
-       if (start64)
+       start = find_section64(v64->hdr, "__fw_ftr_fixup", &size);
+       if (start)
                do_feature_fixups(powerpc_firmware_features,
-                                 start64, start64 + size64);
+                                 start, start + size);
 
-       start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64);
-       if (start64)
+       start = find_section64(v64->hdr, "__lwsync_fixup", &size);
+       if (start)
                do_lwsync_fixups(cur_cpu_spec->cpu_features,
-                                start64, start64 + size64);
+                                start, start + size);
 #endif /* CONFIG_PPC64 */
 
-       start32 = find_section32(v32->hdr, "__ftr_fixup", &size32);
-       if (start32)
+#ifdef CONFIG_VDSO32
+       start = find_section32(v32->hdr, "__ftr_fixup", &size);
+       if (start)
                do_feature_fixups(cur_cpu_spec->cpu_features,
-                                 start32, start32 + size32);
+                                 start, start + size);
 
-       start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32);
-       if (start32)
+       start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size);
+       if (start)
                do_feature_fixups(cur_cpu_spec->mmu_features,
-                                 start32, start32 + size32);
+                                 start, start + size);
 
 #ifdef CONFIG_PPC64
-       start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32);
-       if (start32)
+       start = find_section32(v32->hdr, "__fw_ftr_fixup", &size);
+       if (start)
                do_feature_fixups(powerpc_firmware_features,
-                                 start32, start32 + size32);
+                                 start, start + size);
 #endif /* CONFIG_PPC64 */
 
-       start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32);
-       if (start32)
+       start = find_section32(v32->hdr, "__lwsync_fixup", &size);
+       if (start)
                do_lwsync_fixups(cur_cpu_spec->cpu_features,
-                                start32, start32 + size32);
+                                start, start + size);
+#endif
 
        return 0;
 }
@@ -779,11 +758,15 @@ static int __init vdso_init(void)
 #endif /* CONFIG_PPC64 */
 
 
+#ifdef CONFIG_VDSO32
+       vdso32_kbase = &vdso32_start;
+
        /*
         * Calculate the size of the 32 bits vDSO
         */
        vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
        DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+#endif
 
 
        /*
@@ -804,6 +787,7 @@ static int __init vdso_init(void)
                return 0;
        }
 
+#ifdef CONFIG_VDSO32
        /* Make sure pages are in the correct state */
        vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2),
                                  GFP_KERNEL);
@@ -816,6 +800,7 @@ static int __init vdso_init(void)
        }
        vdso32_pagelist[i++] = virt_to_page(vdso_data);
        vdso32_pagelist[i] = NULL;
+#endif
 
 #ifdef CONFIG_PPC64
        vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2),
index 5bfdab9047be2577443a77034e718ae56253546e..b41426c60ef62ea0f9757a62578ff86d143af3bf 100644 (file)
@@ -1196,6 +1196,11 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
        tbl->it_type = TCE_VB;
        tbl->it_blocksize = 16;
 
+       if (firmware_has_feature(FW_FEATURE_LPAR))
+               tbl->it_ops = &iommu_table_lpar_multi_ops;
+       else
+               tbl->it_ops = &iommu_table_pseries_ops;
+
        return iommu_init_table(tbl, -1);
 }
 
index 4d70df26c402c7f59f4bc8468f93afec162c1f8d..faa86e9c05510973001b7d3c64557e8a9cbd212d 100644 (file)
@@ -324,7 +324,7 @@ kvm_start_guest:
 kvm_secondary_got_guest:
 
        /* Set HSTATE_DSCR(r13) to something sensible */
-       ld      r6, PACA_DSCR(r13)
+       ld      r6, PACA_DSCR_DEFAULT(r13)
        std     r6, HSTATE_DSCR(r13)
 
        /* Order load of vcore, ptid etc. after load of vcpu */
index 7902802a19a56a7ee27d3f1e5ba8e4eacf3f5f6f..a47e14277fd8a91d1d4a255592db73a979a94f1b 100644 (file)
@@ -33,6 +33,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
 
 obj-$(CONFIG_ALTIVEC)  += xor_vmx.o
-CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
+CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
index 9c8770b5f96f20212a3995559e4a4c7c9eb28786..3eb73a38220de34379de3bad092b06d40f051f53 100644 (file)
@@ -36,3 +36,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT)        += subpage-prot.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_HIGHMEM)          += highmem.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
+obj-$(CONFIG_SPAPR_TCE_IOMMU)  += mmu_context_iommu.o
index f031a47d7701e60d349f08b46365e2912844066b..6527882ce05ede3a0a45f74d3a11c4c375da6514 100644 (file)
@@ -26,7 +26,7 @@
 #include <asm/reg.h>
 #include <asm/copro.h>
 #include <asm/spu.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 /*
  * This ought to be kept in sync with the powerpc specific do_page_fault
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
 
 int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
 {
-       u64 vsid;
+       u64 vsid, vsidkey;
        int psize, ssize;
 
        switch (REGION_ID(ea)) {
@@ -109,6 +109,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
                psize = get_slice_psize(mm, ea);
                ssize = user_segment_size(ea);
                vsid = get_vsid(mm->context.id, ea, ssize);
+               vsidkey = SLB_VSID_USER;
                break;
        case VMALLOC_REGION_ID:
                pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
@@ -118,19 +119,21 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
                        psize = mmu_io_psize;
                ssize = mmu_kernel_ssize;
                vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+               vsidkey = SLB_VSID_KERNEL;
                break;
        case KERNEL_REGION_ID:
                pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
                psize = mmu_linear_psize;
                ssize = mmu_kernel_ssize;
                vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+               vsidkey = SLB_VSID_KERNEL;
                break;
        default:
                pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
                return 1;
        }
 
-       vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER;
+       vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
 
        vsid |= mmu_psize_defs[psize].sllp |
                ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
index 9c4880ddecd63f06e0f4b2b45aded0b57b47e940..13befa35d8a8ecdd31611aadb42c6be206ba743e 100644 (file)
@@ -29,7 +29,7 @@
 #include <asm/kexec.h>
 #include <asm/ppc-opcode.h>
 
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #ifdef DEBUG_LOW
 #define DBG_LOW(fmt...) udbg_printf(fmt)
index fda236f908eb3a8239d172d5e3d35b21fc67e3ac..5ec987f65b2c95c4328fdb7369fd4c2d597284ba 100644 (file)
@@ -57,6 +57,7 @@
 #include <asm/fadump.h>
 #include <asm/firmware.h>
 #include <asm/tm.h>
+#include <asm/trace.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -1004,6 +1005,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 
        DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
                ea, access, trap);
+       trace_hash_fault(ea, access, trap);
 
        /* Get region & vsid */
        switch (REGION_ID(ea)) {
@@ -1475,7 +1477,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
        unsigned long hash;
        unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
        unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
-       unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
+       unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
        long ret;
 
        hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
index 45fda71feb27465fcd4079744d0a19cd9f168ea9..0f11819d8f1dc07441f6e13362a4dca53f55fb87 100644 (file)
@@ -560,7 +560,7 @@ subsys_initcall(add_system_ram_resources);
  */
 int devmem_is_allowed(unsigned long pfn)
 {
-       if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+       if (iomem_is_exclusive(PFN_PHYS(pfn)))
                return 0;
        if (!page_is_ram(pfn))
                return 1;
index 178876aef40f8d6dd5d69b650e986225e0ddfb70..4e4efbc2658e3500c1a2f0eb3fb2a036a5d284cc 100644 (file)
@@ -88,6 +88,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 
 #ifdef CONFIG_PPC_64K_PAGES
        mm->context.pte_frag = NULL;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+       mm_iommu_init(&mm->context);
 #endif
        return 0;
 }
@@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm)
 
 void destroy_context(struct mm_struct *mm)
 {
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+       mm_iommu_cleanup(&mm->context);
+#endif
 
 #ifdef CONFIG_PPC_ICSWX
        drop_cop(mm->context.acop, mm);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
new file mode 100644 (file)
index 0000000..da6a216
--- /dev/null
@@ -0,0 +1,316 @@
+/*
+ *  IOMMU helpers in MMU context.
+ *
+ *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <asm/mmu_context.h>
+
+static DEFINE_MUTEX(mem_list_mutex);
+
+struct mm_iommu_table_group_mem_t {
+       struct list_head next;
+       struct rcu_head rcu;
+       unsigned long used;
+       atomic64_t mapped;
+       u64 ua;                 /* userspace address */
+       u64 entries;            /* number of entries in hpas[] */
+       u64 *hpas;              /* vmalloc'ed */
+};
+
+static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
+               unsigned long npages, bool incr)
+{
+       long ret = 0, locked, lock_limit;
+
+       if (!npages)
+               return 0;
+
+       down_write(&mm->mmap_sem);
+
+       if (incr) {
+               locked = mm->locked_vm + npages;
+               lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       ret = -ENOMEM;
+               else
+                       mm->locked_vm += npages;
+       } else {
+               if (WARN_ON_ONCE(npages > mm->locked_vm))
+                       npages = mm->locked_vm;
+               mm->locked_vm -= npages;
+       }
+
+       pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
+                       current->pid,
+                       incr ? '+' : '-',
+                       npages << PAGE_SHIFT,
+                       mm->locked_vm << PAGE_SHIFT,
+                       rlimit(RLIMIT_MEMLOCK));
+       up_write(&mm->mmap_sem);
+
+       return ret;
+}
+
+bool mm_iommu_preregistered(void)
+{
+       if (!current || !current->mm)
+               return false;
+
+       return !list_empty(&current->mm->context.iommu_group_mem_list);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+
+long mm_iommu_get(unsigned long ua, unsigned long entries,
+               struct mm_iommu_table_group_mem_t **pmem)
+{
+       struct mm_iommu_table_group_mem_t *mem;
+       long i, j, ret = 0, locked_entries = 0;
+       struct page *page = NULL;
+
+       if (!current || !current->mm)
+               return -ESRCH; /* process exited */
+
+       mutex_lock(&mem_list_mutex);
+
+       list_for_each_entry_rcu(mem, &current->mm->context.iommu_group_mem_list,
+                       next) {
+               if ((mem->ua == ua) && (mem->entries == entries)) {
+                       ++mem->used;
+                       *pmem = mem;
+                       goto unlock_exit;
+               }
+
+               /* Overlap? */
+               if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
+                               (ua < (mem->ua +
+                                      (mem->entries << PAGE_SHIFT)))) {
+                       ret = -EINVAL;
+                       goto unlock_exit;
+               }
+
+       }
+
+       ret = mm_iommu_adjust_locked_vm(current->mm, entries, true);
+       if (ret)
+               goto unlock_exit;
+
+       locked_entries = entries;
+
+       mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+       if (!mem) {
+               ret = -ENOMEM;
+               goto unlock_exit;
+       }
+
+       mem->hpas = vzalloc(entries * sizeof(mem->hpas[0]));
+       if (!mem->hpas) {
+               kfree(mem);
+               ret = -ENOMEM;
+               goto unlock_exit;
+       }
+
+       for (i = 0; i < entries; ++i) {
+               if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
+                                       1/* pages */, 1/* iswrite */, &page)) {
+                       for (j = 0; j < i; ++j)
+                               put_page(pfn_to_page(
+                                               mem->hpas[j] >> PAGE_SHIFT));
+                       vfree(mem->hpas);
+                       kfree(mem);
+                       ret = -EFAULT;
+                       goto unlock_exit;
+               }
+
+               mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+       }
+
+       atomic64_set(&mem->mapped, 1);
+       mem->used = 1;
+       mem->ua = ua;
+       mem->entries = entries;
+       *pmem = mem;
+
+       list_add_rcu(&mem->next, &current->mm->context.iommu_group_mem_list);
+
+unlock_exit:
+       if (locked_entries && ret)
+               mm_iommu_adjust_locked_vm(current->mm, locked_entries, false);
+
+       mutex_unlock(&mem_list_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+{
+       long i;
+       struct page *page = NULL;
+
+       for (i = 0; i < mem->entries; ++i) {
+               if (!mem->hpas[i])
+                       continue;
+
+               page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+               if (!page)
+                       continue;
+
+               put_page(page);
+               mem->hpas[i] = 0;
+       }
+}
+
+static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
+{
+
+       mm_iommu_unpin(mem);
+       vfree(mem->hpas);
+       kfree(mem);
+}
+
+static void mm_iommu_free(struct rcu_head *head)
+{
+       struct mm_iommu_table_group_mem_t *mem = container_of(head,
+                       struct mm_iommu_table_group_mem_t, rcu);
+
+       mm_iommu_do_free(mem);
+}
+
+static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
+{
+       list_del_rcu(&mem->next);
+       mm_iommu_adjust_locked_vm(current->mm, mem->entries, false);
+       call_rcu(&mem->rcu, mm_iommu_free);
+}
+
+long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
+{
+       long ret = 0;
+
+       if (!current || !current->mm)
+               return -ESRCH; /* process exited */
+
+       mutex_lock(&mem_list_mutex);
+
+       if (mem->used == 0) {
+               ret = -ENOENT;
+               goto unlock_exit;
+       }
+
+       --mem->used;
+       /* There are still users, exit */
+       if (mem->used)
+               goto unlock_exit;
+
+       /* Are there still mappings? */
+       if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
+               ++mem->used;
+               ret = -EBUSY;
+               goto unlock_exit;
+       }
+
+       /* @mapped became 0 so now mappings are disabled, release the region */
+       mm_iommu_release(mem);
+
+unlock_exit:
+       mutex_unlock(&mem_list_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_put);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
+               unsigned long size)
+{
+       struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+       list_for_each_entry_rcu(mem,
+                       &current->mm->context.iommu_group_mem_list,
+                       next) {
+               if ((mem->ua <= ua) &&
+                               (ua + size <= mem->ua +
+                                (mem->entries << PAGE_SHIFT))) {
+                       ret = mem;
+                       break;
+               }
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
+               unsigned long entries)
+{
+       struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+       list_for_each_entry_rcu(mem,
+                       &current->mm->context.iommu_group_mem_list,
+                       next) {
+               if ((mem->ua == ua) && (mem->entries == entries)) {
+                       ret = mem;
+                       break;
+               }
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_find);
+
+long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+               unsigned long ua, unsigned long *hpa)
+{
+       const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+       u64 *va = &mem->hpas[entry];
+
+       if (entry >= mem->entries)
+               return -EFAULT;
+
+       *hpa = *va | (ua & ~PAGE_MASK);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+
+long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+{
+       if (atomic64_inc_not_zero(&mem->mapped))
+               return 0;
+
+       /* Last mm_iommu_put() has been called, no more mappings allowed() */
+       return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
+
+void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
+{
+       atomic64_add_unless(&mem->mapped, -1, 1);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
+
+void mm_iommu_init(mm_context_t *ctx)
+{
+       INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list);
+}
+
+void mm_iommu_cleanup(mm_context_t *ctx)
+{
+       struct mm_iommu_table_group_mem_t *mem, *tmp;
+
+       list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) {
+               list_del_rcu(&mem->next);
+               mm_iommu_do_free(mem);
+       }
+}
index 89bf95bd63b1f6bac752956bbea522f31093487d..765b419883f22edebd839f16b7842cd833e2681a 100644 (file)
@@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
        rldicl  r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
        clrrdi  r15,r15,3
        cmpdi   cr0,r14,0
-       bge     tlb_miss_fault_e6500    /* Bad pgd entry or hugepage; bail */
+       bge     tlb_miss_huge_e6500     /* Bad pgd entry or hugepage; bail */
        ldx     r14,r14,r15             /* grab pud entry */
 
        rldicl  r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
        clrrdi  r15,r15,3
        cmpdi   cr0,r14,0
-       bge     tlb_miss_fault_e6500
+       bge     tlb_miss_huge_e6500
        ldx     r14,r14,r15             /* Grab pmd entry */
 
        mfspr   r10,SPRN_MAS0
        cmpdi   cr0,r14,0
-       bge     tlb_miss_fault_e6500
+       bge     tlb_miss_huge_e6500
 
        /* Now we build the MAS for a 2M indirect page:
         *
@@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
        clrrdi  r15,r16,21              /* make EA 2M-aligned */
        mtspr   SPRN_MAS2,r15
 
+tlb_miss_huge_done_e6500:
        lbz     r15,TCD_ESEL_NEXT(r11)
        lbz     r16,TCD_ESEL_MAX(r11)
        lbz     r14,TCD_ESEL_FIRST(r11)
@@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
        tlb_epilog_bolted
        rfi
 
+tlb_miss_huge_e6500:
+       beq     tlb_miss_fault_e6500
+       li      r10,1
+       andi.   r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
+       rldimi  r14,r10,63,0            /* Set PD_HUGE */
+       xor     r14,r14,r15             /* Clear size bits */
+       ldx     r14,0,r14
+
+       /*
+        * Now we build the MAS for a huge page.
+        *
+        * MAS 0   :    ESEL needs to be filled by software round-robin
+        *               - can be handled by indirect code
+        * MAS 1   :    Need to clear IND and set TSIZE
+        * MAS 2,3+7:   Needs to be redone similar to non-tablewalk handler
+        */
+
+       subi    r15,r15,10              /* Convert psize to tsize */
+       mfspr   r10,SPRN_MAS1
+       rlwinm  r10,r10,0,~MAS1_IND
+       rlwimi  r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
+       mtspr   SPRN_MAS1,r10
+
+       li      r10,-0x400
+       sld     r15,r10,r15             /* Generate mask based on size */
+       and     r10,r16,r15
+       rldicr  r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+       rlwimi  r10,r14,32-19,27,31     /* Insert WIMGE */
+       clrldi  r15,r15,PAGE_SHIFT      /* Clear crap at the top */
+       rlwimi  r15,r14,32-8,22,25      /* Move in U bits */
+       mtspr   SPRN_MAS2,r10
+       andi.   r10,r14,_PAGE_DIRTY
+       rlwimi  r15,r14,32-2,26,31      /* Move in BAP bits */
+
+       /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+       bne     1f
+       li      r10,MAS3_SW|MAS3_UW
+       andc    r15,r15,r10
+1:
+       mtspr   SPRN_MAS7_MAS3,r15
+
+       mfspr   r10,SPRN_MAS0
+       b       tlb_miss_huge_done_e6500
+
 tlb_miss_kernel_e6500:
        ld      r14,PACA_KERNELPGD(r13)
        cmpldi  cr1,r15,8               /* Check for vmalloc region */
index 12b638425bb9b543f4761246bbb7709e64d379cb..d90893b76e7ceb51b5741d50d2d06fe873ee6af4 100644 (file)
@@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { }
 
 static bool regs_use_siar(struct pt_regs *regs)
 {
-       return !!regs->result;
+       /*
+        * When we take a performance monitor exception the regs are setup
+        * using perf_read_regs() which overloads some fields, in particular
+        * regs->result to tell us whether to use SIAR.
+        *
+        * However if the regs are from another exception, eg. a syscall, then
+        * they have not been setup using perf_read_regs() and so regs->result
+        * is something random.
+        */
+       return ((TRAP(regs) == 0xf00) && regs->result);
 }
 
 /*
index c949ca055712be9c6bbe5ff4ed808d2d640a434a..63016621aff8af4807b37ed7af2a7ee078755e4b 100644 (file)
@@ -193,7 +193,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
 
 void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc)
 {
-       struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq);
+       struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
        int sub_virq;
        u32 status;
 
index 2fb4b24368a6e6ae7851ade3bcae379d2245229b..97915feffd42d35be230f3d4b58883b938b4eaae 100644 (file)
@@ -282,7 +282,7 @@ config CORENET_GENERIC
          For 64bit kernel, the following boards are supported:
            T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
          The following boards are supported for both 32bit and 64bit kernel:
-           P5020 DS, P5040 DS and T104xQDS/RDB
+           P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB
 
 endif # FSL_SOC_BOOKE
 
index 9824d2cf79bd6d618dba10e1bb4e38f4b946b408..bd839dc287fe61f3ef507a1d2d0070d2ad7b31c5 100644 (file)
@@ -150,6 +150,9 @@ static const char * const boards[] __initconst = {
        "fsl,B4860QDS",
        "fsl,B4420QDS",
        "fsl,B4220QDS",
+       "fsl,T1023RDB",
+       "fsl,T1024QDS",
+       "fsl,T1024RDB",
        "fsl,T1040QDS",
        "fsl,T1042QDS",
        "fsl,T1040RDB",
index 8631ac5f0e579c35ffa4ac2153e237be908a8817..b8b8216979104c22dc29fc06dd433aeedb409e33 100644 (file)
@@ -345,6 +345,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
        local_irq_disable();
 
        if (secondary) {
+               __flush_disable_L1();
                atomic_inc(&kexec_down_cpus);
                /* loop forever */
                while (1);
@@ -357,61 +358,11 @@ static void mpc85xx_smp_kexec_down(void *arg)
                ppc_md.kexec_cpu_down(0,1);
 }
 
-static void map_and_flush(unsigned long paddr)
-{
-       struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
-       unsigned long kaddr  = (unsigned long)kmap_atomic(page);
-
-       flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
-       kunmap_atomic((void *)kaddr);
-}
-
-/**
- * Before we reset the other cores, we need to flush relevant cache
- * out to memory so we don't get anything corrupted, some of these flushes
- * are performed out of an overabundance of caution as interrupts are not
- * disabled yet and we can switch cores
- */
-static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image)
-{
-       kimage_entry_t *ptr, entry;
-       unsigned long paddr;
-       int i;
-
-       if (image->type == KEXEC_TYPE_DEFAULT) {
-               /* normal kexec images are stored in temporary pages */
-               for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
-                    ptr = (entry & IND_INDIRECTION) ?
-                               phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
-                       if (!(entry & IND_DESTINATION)) {
-                               map_and_flush(entry);
-                       }
-               }
-               /* flush out last IND_DONE page */
-               map_and_flush(entry);
-       } else {
-               /* crash type kexec images are copied to the crash region */
-               for (i = 0; i < image->nr_segments; i++) {
-                       struct kexec_segment *seg = &image->segment[i];
-                       for (paddr = seg->mem; paddr < seg->mem + seg->memsz;
-                            paddr += PAGE_SIZE) {
-                               map_and_flush(paddr);
-                       }
-               }
-       }
-
-       /* also flush the kimage struct to be passed in as well */
-       flush_dcache_range((unsigned long)image,
-                          (unsigned long)image + sizeof(*image));
-}
-
 static void mpc85xx_smp_machine_kexec(struct kimage *image)
 {
        int timeout = INT_MAX;
        int i, num_cpus = num_present_cpus();
 
-       mpc85xx_smp_flush_dcache_kexec(image);
-
        if (image->type == KEXEC_TYPE_DEFAULT)
                smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
 
index 1eadb6d0dc6406acbe24a5b946fb4a62b104b83e..30e002f4648c81a1b7f1d4a6d5a0d7c08d4ea2fb 100644 (file)
@@ -79,7 +79,7 @@ static void __init twr_p1025_setup_arch(void)
        mpc85xx_qe_init();
        mpc85xx_qe_par_io_init();
 
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
        if (machine_is(twr_p1025)) {
                struct ccsr_guts __iomem *guts;
 
@@ -101,7 +101,7 @@ static void __init twr_p1025_setup_arch(void)
                                        MPC85xx_PMUXCR_QE(12));
                        iounmap(guts);
 
-#if defined(CONFIG_SERIAL_QE)
+#if IS_ENABLED(CONFIG_SERIAL_QE)
                        /* On P1025TWR board, the UCC7 acted as UART port.
                         * However, The UCC7's CTS pin is low level in default,
                         * it will impact the transmission in full duplex
index 7264e91190be928e125d01fe22be89a2b544c597..c140e94c7c72b466483fbf05be8eb82e084d346b 100644 (file)
@@ -405,6 +405,16 @@ config PPC_DOORBELL
 
 endmenu
 
+config VDSO32
+       def_bool y
+       depends on PPC32 || CPU_BIG_ENDIAN
+       help
+         This symbol controls whether we build the 32-bit VDSO. We obviously
+         want to do that if we're building a 32-bit kernel. If we're building
+         a 64-bit kernel then we only want a 32-bit VDSO if we're building for
+         big endian. That is because the only little endian configuration we
+         support is ppc64le which is 64-bit only.
+
 choice
        prompt "Endianness selection"
        default CPU_BIG_ENDIAN
@@ -421,6 +431,7 @@ config CPU_BIG_ENDIAN
 
 config CPU_LITTLE_ENDIAN
        bool "Build little endian kernel"
+       depends on PPC_BOOK3S_64
        select PPC64_BOOT_WRAPPER
        help
          Build a little endian kernel.
index 623bd961465ad501eeb1294fae6095105e081d4a..fe51de4fcf135a1d1b355efcd8c20915c3a1b80e 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/machdep.h>
 #include <asm/prom.h>
 
+#include "cell.h"
 
 /*
  * MSIC registers, specified as offsets from dcr_base
@@ -95,7 +96,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
 static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 {
        struct irq_chip *chip = irq_desc_get_chip(desc);
-       struct axon_msic *msic = irq_get_handler_data(irq);
+       struct axon_msic *msic = irq_desc_get_handler_data(desc);
        u32 write_offset, msi;
        int idx;
        int retry = 0;
@@ -406,8 +407,8 @@ static int axon_msi_probe(struct platform_device *device)
 
        dev_set_drvdata(&device->dev, msic);
 
-       ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+       cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs;
+       cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
 
        axon_msi_debug_setup(dn, msic);
 
index 21b502398bf379063a000012c1a7d2df4cf2c2b9..14a582b2127458c31a4865753bf6acc97755a4f5 100644 (file)
@@ -466,6 +466,11 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np)
        return *ioid;
 }
 
+static struct iommu_table_ops cell_iommu_ops = {
+       .set = tce_build_cell,
+       .clear = tce_free_cell
+};
+
 static struct iommu_window * __init
 cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
                        unsigned long offset, unsigned long size,
@@ -492,6 +497,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
        window->table.it_offset =
                (offset >> window->table.it_page_shift) + pte_offset;
        window->table.it_size = size >> window->table.it_page_shift;
+       window->table.it_ops = &cell_iommu_ops;
 
        iommu_init_table(&window->table, iommu->nid);
 
@@ -1201,8 +1207,6 @@ static int __init cell_iommu_init(void)
        /* Setup various callbacks */
        cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
        ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
-       ppc_md.tce_build = tce_build_cell;
-       ppc_md.tce_free = tce_free_cell;
 
        if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
                goto bail;
index c269caee58f9486f6e260cf67367cb501820642c..9dd154d6f89a9205aa33eff33930e2cb0efc3397 100644 (file)
@@ -124,7 +124,7 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
                                      struct irq_desc *desc)
 {
        struct irq_chip *chip = irq_desc_get_chip(desc);
-       struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq);
+       struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
        unsigned int virq;
 
        raw_spin_lock(&desc->lock);
index 8e8d4cae5ebe731ff5b1c45723ec11ba4c13c68f..60b4e0fd9808aed53e9284bee352b862b053e770 100644 (file)
@@ -1,2 +1,3 @@
 obj-y  += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
 obj-$(CONFIG_PPC_PASEMI_MDIO)  += gpio_mdio.o
+obj-$(CONFIG_PCI_MSI)          += msi.o
index b8f567b2ea1921eddef2f5687714904b83e75080..c929644e74a6dd1ca90430d54c4097dc95f382cf 100644 (file)
@@ -134,6 +134,10 @@ static void iobmap_free(struct iommu_table *tbl, long index,
        }
 }
 
+static struct iommu_table_ops iommu_table_iobmap_ops = {
+       .set = iobmap_build,
+       .clear  = iobmap_free
+};
 
 static void iommu_table_iobmap_setup(void)
 {
@@ -153,6 +157,7 @@ static void iommu_table_iobmap_setup(void)
         * Should probably be 8 (64 bytes)
         */
        iommu_table_iobmap.it_blocksize = 4;
+       iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
        iommu_init_table(&iommu_table_iobmap, 0);
        pr_debug(" <- %s\n", __func__);
 }
@@ -252,8 +257,6 @@ void __init iommu_init_early_pasemi(void)
 
        pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
        pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
-       ppc_md.tce_build = iobmap_build;
-       ppc_md.tce_free  = iobmap_free;
        set_pci_dma_ops(&dma_iommu_ops);
 }
 
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
new file mode 100644 (file)
index 0000000..27f2b18
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2007, Olof Johansson, PA Semi
+ *
+ * Based on arch/powerpc/sysdev/mpic_u3msi.c:
+ *
+ * Copyright 2006, Segher Boessenkool, IBM Corporation.
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ */
+
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include <sysdev/mpic.h>
+
+/* Allocate 16 interrupts per device, to give an alignment of 16,
+ * since that's the size of the grouping w.r.t. affinity. If someone
+ * needs more than 32 MSI's down the road we'll have to rethink this,
+ * but it should be OK for now.
+ */
+#define ALLOC_CHUNK 16
+
+#define PASEMI_MSI_ADDR 0xfc080000
+
+/* A bit ugly, can we get this from the pci_dev somehow? */
+static struct mpic *msi_mpic;
+
+
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
+{
+       pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+       pci_msi_mask_irq(data);
+       mpic_mask_irq(data);
+}
+
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
+{
+       pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+       mpic_unmask_irq(data);
+       pci_msi_unmask_irq(data);
+}
+
+static struct irq_chip mpic_pasemi_msi_chip = {
+       .irq_shutdown           = mpic_pasemi_msi_mask_irq,
+       .irq_mask               = mpic_pasemi_msi_mask_irq,
+       .irq_unmask             = mpic_pasemi_msi_unmask_irq,
+       .irq_eoi                = mpic_end_irq,
+       .irq_set_type           = mpic_set_irq_type,
+       .irq_set_affinity       = mpic_set_affinity,
+       .name                   = "PASEMI-MSI",
+};
+
+static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
+{
+       struct msi_desc *entry;
+
+       pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev);
+
+       list_for_each_entry(entry, &pdev->msi_list, list) {
+               if (entry->irq == NO_IRQ)
+                       continue;
+
+               irq_set_msi_desc(entry->irq, NULL);
+               msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap,
+                                      virq_to_hw(entry->irq), ALLOC_CHUNK);
+               irq_dispose_mapping(entry->irq);
+       }
+
+       return;
+}
+
+static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+       unsigned int virq;
+       struct msi_desc *entry;
+       struct msi_msg msg;
+       int hwirq;
+
+       if (type == PCI_CAP_ID_MSIX)
+               pr_debug("pasemi_msi: MSI-X untested, trying anyway\n");
+       pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n",
+                pdev, nvec, type);
+
+       msg.address_hi = 0;
+       msg.address_lo = PASEMI_MSI_ADDR;
+
+       list_for_each_entry(entry, &pdev->msi_list, list) {
+               /* Allocate 16 interrupts for now, since that's the grouping for
+                * affinity. This can be changed later if it turns out 32 is too
+                * few MSIs for someone, but restrictions will apply to how the
+                * sources can be changed independently.
+                */
+               hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap,
+                                               ALLOC_CHUNK);
+               if (hwirq < 0) {
+                       pr_debug("pasemi_msi: failed allocating hwirq\n");
+                       return hwirq;
+               }
+
+               virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
+               if (virq == NO_IRQ) {
+                       pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n",
+                                 hwirq);
+                       msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq,
+                                              ALLOC_CHUNK);
+                       return -ENOSPC;
+               }
+
+               /* Vector on MSI is really an offset, the hardware adds
+                * it to the value written at the magic address. So set
+                * it to 0 to remain sane.
+                */
+               mpic_set_vector(virq, 0);
+
+               irq_set_msi_desc(virq, entry);
+               irq_set_chip(virq, &mpic_pasemi_msi_chip);
+               irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
+
+               pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \
+                        "addr 0x%x\n", virq, hwirq, msg.address_lo);
+
+               /* Likewise, the device writes [0...511] into the target
+                * register to generate MSI [512...1023]
+                */
+               msg.data = hwirq-0x200;
+               pci_write_msi_msg(virq, &msg);
+       }
+
+       return 0;
+}
+
+int mpic_pasemi_msi_init(struct mpic *mpic)
+{
+       int rc;
+       struct pci_controller *phb;
+
+       if (!mpic->irqhost->of_node ||
+           !of_device_is_compatible(mpic->irqhost->of_node,
+                                    "pasemi,pwrficient-openpic"))
+               return -ENODEV;
+
+       rc = mpic_msi_init_allocator(mpic);
+       if (rc) {
+               pr_debug("pasemi_msi: Error allocating bitmap!\n");
+               return rc;
+       }
+
+       pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n");
+
+       msi_mpic = mpic;
+       list_for_each_entry(phb, &hose_list, list_node) {
+               WARN_ON(phb->controller_ops.setup_msi_irqs);
+               phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs;
+               phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs;
+       }
+
+       return 0;
+}
index 4b044d8cb49a36fefcdc335a88bf5fa35743bd28..604190cab5227c6fd8957baa00aa3ae6ca87a790 100644 (file)
@@ -19,3 +19,10 @@ config PPC_POWERNV
        select CPU_FREQ_GOV_CONSERVATIVE
        select PPC_DOORBELL
        default y
+
+config OPAL_PRD
+       tristate 'OPAL PRD driver'
+       depends on PPC_POWERNV
+       help
+         This enables the opal-prd driver, a facility to run processor
+         recovery diagnostics on OpenPower machines
index 33e44f37212f9fe785db5c554d274e051cb025d4..1c8cdb6250e7c15fb0b26620e0ba43756008cfbc 100644 (file)
@@ -1,7 +1,7 @@
-obj-y                  += setup.o opal-wrappers.o opal.o opal-async.o
+obj-y                  += setup.o opal-wrappers.o opal.o opal-async.o idle.o
 obj-y                  += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y                  += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
-obj-y                  += opal-msglog.o opal-hmi.o opal-power.o
+obj-y                  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 
 obj-$(CONFIG_SMP)      += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)      += pci.o pci-p5ioc2.o pci-ioda.o
@@ -9,3 +9,4 @@ obj-$(CONFIG_EEH)       += eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)      += opal-tracepoints.o
+obj-$(CONFIG_OPAL_PRD) += opal-prd.o
index ce738ab3d5a9f6b93dbcc4b10201f68624fce15e..5cf5e6ea213baaeee1b6e017636eae95aba6b0c6 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/msi.h>
 #include <linux/of.h>
@@ -40,6 +41,7 @@
 #include "pci.h"
 
 static bool pnv_eeh_nb_init = false;
+static int eeh_event_irq = -EINVAL;
 
 /**
  * pnv_eeh_init - EEH platform dependent initialization
@@ -88,34 +90,22 @@ static int pnv_eeh_init(void)
        return 0;
 }
 
-static int pnv_eeh_event(struct notifier_block *nb,
-                        unsigned long events, void *change)
+static irqreturn_t pnv_eeh_event(int irq, void *data)
 {
-       uint64_t changed_evts = (uint64_t)change;
-
        /*
-        * We simply send special EEH event if EEH has
-        * been enabled, or clear pending events in
-        * case that we enable EEH soon
+        * We simply send a special EEH event if EEH has been
+        * enabled. We don't care about EEH events until we've
+        * finished processing the outstanding ones. Event processing
+        * gets unmasked in next_error() if EEH is enabled.
         */
-       if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
-           !(events & OPAL_EVENT_PCI_ERROR))
-               return 0;
+       disable_irq_nosync(irq);
 
        if (eeh_enabled())
                eeh_send_failure_event(NULL);
-       else
-               opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
-       return 0;
+       return IRQ_HANDLED;
 }
 
-static struct notifier_block pnv_eeh_nb = {
-       .notifier_call  = pnv_eeh_event,
-       .next           = NULL,
-       .priority       = 0
-};
-
 #ifdef CONFIG_DEBUG_FS
 static ssize_t pnv_eeh_ei_write(struct file *filp,
                                const char __user *user_buf,
@@ -237,16 +227,28 @@ static int pnv_eeh_post_init(void)
 
        /* Register OPAL event notifier */
        if (!pnv_eeh_nb_init) {
-               ret = opal_notifier_register(&pnv_eeh_nb);
-               if (ret) {
-                       pr_warn("%s: Can't register OPAL event notifier (%d)\n",
-                               __func__, ret);
+               eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+               if (eeh_event_irq < 0) {
+                       pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+                              __func__, eeh_event_irq);
+                       return eeh_event_irq;
+               }
+
+               ret = request_irq(eeh_event_irq, pnv_eeh_event,
+                               IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+               if (ret < 0) {
+                       irq_dispose_mapping(eeh_event_irq);
+                       pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+                              __func__, eeh_event_irq);
                        return ret;
                }
 
                pnv_eeh_nb_init = true;
        }
 
+       if (!eeh_enabled())
+               disable_irq(eeh_event_irq);
+
        list_for_each_entry(hose, &hose_list, list_node) {
                phb = hose->private_data;
 
@@ -979,7 +981,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
 /**
  * pnv_eeh_wait_state - Wait for PE state
  * @pe: EEH PE
- * @max_wait: maximal period in microsecond
+ * @max_wait: maximal period in millisecond
  *
  * Wait for the state of associated PE. It might take some time
  * to retrieve the PE's state.
@@ -1000,13 +1002,13 @@ static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
                if (ret != EEH_STATE_UNAVAILABLE)
                        return ret;
 
-               max_wait -= mwait;
                if (max_wait <= 0) {
                        pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
                                __func__, pe->addr, max_wait);
                        return EEH_STATE_NOT_SUPPORT;
                }
 
+               max_wait -= mwait;
                msleep(mwait);
        }
 
@@ -1303,12 +1305,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
        int state, ret = EEH_NEXT_ERR_NONE;
 
        /*
-        * While running here, it's safe to purge the event queue.
-        * And we should keep the cached OPAL notifier event sychronized
-        * between the kernel and firmware.
+        * While running here, it's safe to purge the event queue. The
+        * event should still be masked.
         */
        eeh_remove_event(NULL, false);
-       opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
        list_for_each_entry(hose, &hose_list, list_node) {
                /*
@@ -1477,6 +1477,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
                        break;
        }
 
+       /* Unmask the event */
+       if (eeh_enabled())
+               enable_irq(eeh_event_irq);
+
        return ret;
 }
 
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
new file mode 100644 (file)
index 0000000..59d735d
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * PowerNV cpuidle code
+ *
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
+#include <asm/smp.h>
+
+#include "powernv.h"
+#include "subcore.h"
+
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+       int cpu;
+       int rc;
+
+       /*
+        * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+        * all cpus at boot. Get these reg values of current cpu and use the
+        * same accross all cpus.
+        */
+       uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+       uint64_t hid0_val = mfspr(SPRN_HID0);
+       uint64_t hid1_val = mfspr(SPRN_HID1);
+       uint64_t hid4_val = mfspr(SPRN_HID4);
+       uint64_t hid5_val = mfspr(SPRN_HID5);
+       uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+       for_each_possible_cpu(cpu) {
+               uint64_t pir = get_hard_smp_processor_id(cpu);
+               uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+               /*
+                * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+                * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+                * with 63rd bit set, so that when a thread wakes up at 0x100 we
+                * can use this bit to distinguish between fastsleep and
+                * deep winkle.
+                */
+               hsprg0_val |= 1;
+
+               rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+               if (rc != 0)
+                       return rc;
+
+               rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+               if (rc != 0)
+                       return rc;
+
+               /* HIDs are per core registers */
+               if (cpu_thread_in_core(cpu) == 0) {
+
+                       rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+                       if (rc != 0)
+                               return rc;
+
+                       rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+                       if (rc != 0)
+                               return rc;
+
+                       rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+                       if (rc != 0)
+                               return rc;
+
+                       rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+                       if (rc != 0)
+                               return rc;
+
+                       rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+                       if (rc != 0)
+                               return rc;
+               }
+       }
+
+       return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+       int i, j;
+       int nr_cores = cpu_nr_cores();
+       u32 *core_idle_state;
+
+       /*
+        * core_idle_state - First 8 bits track the idle state of each thread
+        * of the core. The 8th bit is the lock bit. Initially all thread bits
+        * are set. They are cleared when the thread enters deep idle state
+        * like sleep and winkle. Initially the lock bit is cleared.
+        * The lock bit has 2 purposes
+        * a. While the first thread is restoring core state, it prevents
+        * other threads in the core from switching to process context.
+        * b. While the last thread in the core is saving the core state, it
+        * prevents a different thread from waking up.
+        */
+       for (i = 0; i < nr_cores; i++) {
+               int first_cpu = i * threads_per_core;
+               int node = cpu_to_node(first_cpu);
+
+               core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+               *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+               for (j = 0; j < threads_per_core; j++) {
+                       int cpu = first_cpu + j;
+
+                       paca[cpu].core_idle_state_ptr = core_idle_state;
+                       paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+                       paca[cpu].thread_mask = 1 << j;
+               }
+       }
+
+       update_subcore_sibling_mask();
+
+       if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+               pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+       return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+
+static void pnv_fastsleep_workaround_apply(void *info)
+
+{
+       int rc;
+       int *err = info;
+
+       rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+                                       OPAL_CONFIG_IDLE_APPLY);
+       if (rc)
+               *err = 1;
+}
+
+/*
+ * Used to store fastsleep workaround state
+ * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
+ * 1 - Workaround applied once, never undone.
+ */
+static u8 fastsleep_workaround_applyonce;
+
+static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
+}
+
+static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
+               struct device_attribute *attr, const char *buf,
+               size_t count)
+{
+       cpumask_t primary_thread_mask;
+       int err;
+       u8 val;
+
+       if (kstrtou8(buf, 0, &val) || val != 1)
+               return -EINVAL;
+
+       if (fastsleep_workaround_applyonce == 1)
+               return count;
+
+       /*
+        * fastsleep_workaround_applyonce = 1 implies
+        * fastsleep workaround needs to be left in 'applied' state on all
+        * the cores. Do this by-
+        * 1. Patching out the call to 'undo' workaround in fastsleep exit path
+        * 2. Sending ipi to all the cores which have atleast one online thread
+        * 3. Patching out the call to 'apply' workaround in fastsleep entry
+        * path
+        * There is no need to send ipi to cores which have all threads
+        * offlined, as last thread of the core entering fastsleep or deeper
+        * state would have applied workaround.
+        */
+       err = patch_instruction(
+               (unsigned int *)pnv_fastsleep_workaround_at_exit,
+               PPC_INST_NOP);
+       if (err) {
+               pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
+               goto fail;
+       }
+
+       get_online_cpus();
+       primary_thread_mask = cpu_online_cores_map();
+       on_each_cpu_mask(&primary_thread_mask,
+                               pnv_fastsleep_workaround_apply,
+                               &err, 1);
+       put_online_cpus();
+       if (err) {
+               pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
+               goto fail;
+       }
+
+       err = patch_instruction(
+               (unsigned int *)pnv_fastsleep_workaround_at_entry,
+               PPC_INST_NOP);
+       if (err) {
+               pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
+               goto fail;
+       }
+
+       fastsleep_workaround_applyonce = 1;
+
+       return count;
+fail:
+       return -EIO;
+}
+
+static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
+                       show_fastsleep_workaround_applyonce,
+                       store_fastsleep_workaround_applyonce);
+
+static int __init pnv_init_idle_states(void)
+{
+       struct device_node *power_mgt;
+       int dt_idle_states;
+       u32 *flags;
+       int i;
+
+       supported_cpuidle_states = 0;
+
+       if (cpuidle_disable != IDLE_NO_OVERRIDE)
+               goto out;
+
+       if (!firmware_has_feature(FW_FEATURE_OPALv3))
+               goto out;
+
+       power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+       if (!power_mgt) {
+               pr_warn("opal: PowerMgmt Node not found\n");
+               goto out;
+       }
+       dt_idle_states = of_property_count_u32_elems(power_mgt,
+                       "ibm,cpu-idle-state-flags");
+       if (dt_idle_states < 0) {
+               pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+               goto out;
+       }
+
+       flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
+       if (of_property_read_u32_array(power_mgt,
+                       "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
+               pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+               goto out_free;
+       }
+
+       for (i = 0; i < dt_idle_states; i++)
+               supported_cpuidle_states |= flags[i];
+
+       if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+               patch_instruction(
+                       (unsigned int *)pnv_fastsleep_workaround_at_entry,
+                       PPC_INST_NOP);
+               patch_instruction(
+                       (unsigned int *)pnv_fastsleep_workaround_at_exit,
+                       PPC_INST_NOP);
+       } else {
+               /*
+                * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+                * workaround is needed to use fastsleep. Provide sysfs
+                * control to choose how this workaround has to be applied.
+                */
+               device_create_file(cpu_subsys.dev_root,
+                               &dev_attr_fastsleep_workaround_applyonce);
+       }
+
+       pnv_alloc_idle_core_states();
+out_free:
+       kfree(flags);
+out:
+       return 0;
+}
+machine_subsys_initcall(powernv, pnv_init_idle_states);
index 693b6cdac691b63057890e6b1152f87de5bf4b13..bdc8c0c71d156483619f17db2f81e50722c0be7c 100644 (file)
@@ -151,7 +151,7 @@ static struct notifier_block opal_async_comp_nb = {
                .priority       = 0,
 };
 
-static int __init opal_async_comp_init(void)
+int __init opal_async_comp_init(void)
 {
        struct device_node *opal_node;
        const __be32 *async;
@@ -205,4 +205,3 @@ out_opal_node:
 out:
        return err;
 }
-machine_subsys_initcall(powernv, opal_async_comp_init);
index 5aa9c1ce4de3eabd8476211d7323622dd7c60254..2ee96431f7360e1d8d63ece45520416860c62428 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/delay.h>
+#include <linux/interrupt.h>
 
 #include <asm/opal.h>
 
@@ -60,7 +61,7 @@ static ssize_t dump_type_show(struct dump_obj *dump_obj,
                              struct dump_attribute *attr,
                              char *buf)
 {
-       
+
        return sprintf(buf, "0x%x %s\n", dump_obj->type,
                       dump_type_to_string(dump_obj->type));
 }
@@ -363,7 +364,7 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
        return dump;
 }
 
-static int process_dump(void)
+static irqreturn_t process_dump(int irq, void *data)
 {
        int rc;
        uint32_t dump_id, dump_size, dump_type;
@@ -387,45 +388,13 @@ static int process_dump(void)
        if (!dump)
                return -1;
 
-       return 0;
-}
-
-static void dump_work_fn(struct work_struct *work)
-{
-       process_dump();
+       return IRQ_HANDLED;
 }
 
-static DECLARE_WORK(dump_work, dump_work_fn);
-
-static void schedule_process_dump(void)
-{
-       schedule_work(&dump_work);
-}
-
-/*
- * New dump available notification
- *
- * Once we get notification, we add sysfs entries for it.
- * We only fetch the dump on demand, and create sysfs asynchronously.
- */
-static int dump_event(struct notifier_block *nb,
-                     unsigned long events, void *change)
-{
-       if (events & OPAL_EVENT_DUMP_AVAIL)
-               schedule_process_dump();
-
-       return 0;
-}
-
-static struct notifier_block dump_nb = {
-       .notifier_call  = dump_event,
-       .next           = NULL,
-       .priority       = 0
-};
-
 void __init opal_platform_dump_init(void)
 {
        int rc;
+       int dump_irq;
 
        /* ELOG not supported by firmware */
        if (!opal_check_token(OPAL_DUMP_READ))
@@ -445,10 +414,19 @@ void __init opal_platform_dump_init(void)
                return;
        }
 
-       rc = opal_notifier_register(&dump_nb);
+       dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL));
+       if (!dump_irq) {
+               pr_err("%s: Can't register OPAL event irq (%d)\n",
+                      __func__, dump_irq);
+               return;
+       }
+
+       rc = request_threaded_irq(dump_irq, NULL, process_dump,
+                               IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                               "opal-dump", NULL);
        if (rc) {
-               pr_warn("%s: Can't register OPAL event notifier (%d)\n",
-                       __func__, rc);
+               pr_err("%s: Can't request OPAL event irq (%d)\n",
+                      __func__, rc);
                return;
        }
 
index 38ce757e5e2af59354ace706976e292b62d3d1d9..4949ef0d94004e0f315f90e37cc2a50ce3582584 100644 (file)
@@ -10,6 +10,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/sysfs.h>
@@ -276,24 +277,15 @@ static void elog_work_fn(struct work_struct *work)
 
 static DECLARE_WORK(elog_work, elog_work_fn);
 
-static int elog_event(struct notifier_block *nb,
-                               unsigned long events, void *change)
+static irqreturn_t elog_event(int irq, void *data)
 {
-       /* check for error log event */
-       if (events & OPAL_EVENT_ERROR_LOG_AVAIL)
-               schedule_work(&elog_work);
-       return 0;
+       schedule_work(&elog_work);
+       return IRQ_HANDLED;
 }
 
-static struct notifier_block elog_nb = {
-       .notifier_call  = elog_event,
-       .next           = NULL,
-       .priority       = 0
-};
-
 int __init opal_elog_init(void)
 {
-       int rc = 0;
+       int rc = 0, irq;
 
        /* ELOG not supported by firmware */
        if (!opal_check_token(OPAL_ELOG_READ))
@@ -305,10 +297,18 @@ int __init opal_elog_init(void)
                return -1;
        }
 
-       rc = opal_notifier_register(&elog_nb);
+       irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL));
+       if (!irq) {
+               pr_err("%s: Can't register OPAL event irq (%d)\n",
+                      __func__, irq);
+               return irq;
+       }
+
+       rc = request_irq(irq, elog_event,
+                       IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL);
        if (rc) {
-               pr_err("%s: Can't register OPAL event notifier (%d)\n",
-               __func__, rc);
+               pr_err("%s: Can't request OPAL event irq (%d)\n",
+                      __func__, rc);
                return rc;
        }
 
index b322bfb51343f65fdfe76d265cdcb76928011d21..a8f49d380449bf172a9f9361389b9beae52bc2e2 100644 (file)
@@ -170,7 +170,7 @@ static struct notifier_block opal_hmi_handler_nb = {
        .priority       = 0,
 };
 
-static int __init opal_hmi_handler_init(void)
+int __init opal_hmi_handler_init(void)
 {
        int ret;
 
@@ -186,4 +186,3 @@ static int __init opal_hmi_handler_init(void)
        }
        return 0;
 }
-machine_subsys_initcall(powernv, opal_hmi_handler_init);
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
new file mode 100644 (file)
index 0000000..e2e7d75
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+ * This file implements an irqchip for OPAL events. Whenever there is
+ * an interrupt that is handled by OPAL we get passed a list of events
+ * that Linux needs to do something about. These basically look like
+ * interrupts to Linux so we implement an irqchip to handle them.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+
+/* Maximum number of events supported by OPAL firmware */
+#define MAX_NUM_EVENTS 64
+
+struct opal_event_irqchip {
+       struct irq_chip irqchip;
+       struct irq_domain *domain;
+       unsigned long mask;
+};
+static struct opal_event_irqchip opal_event_irqchip;
+
+static unsigned int opal_irq_count;
+static unsigned int *opal_irqs;
+
+static void opal_handle_irq_work(struct irq_work *work);
+static __be64 last_outstanding_events;
+static struct irq_work opal_event_irq_work = {
+       .func = opal_handle_irq_work,
+};
+
+static void opal_event_mask(struct irq_data *d)
+{
+       clear_bit(d->hwirq, &opal_event_irqchip.mask);
+}
+
+static void opal_event_unmask(struct irq_data *d)
+{
+       set_bit(d->hwirq, &opal_event_irqchip.mask);
+
+       opal_poll_events(&last_outstanding_events);
+       if (last_outstanding_events & opal_event_irqchip.mask)
+               /* Need to retrigger the interrupt */
+               irq_work_queue(&opal_event_irq_work);
+}
+
+static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
+{
+       /*
+        * For now we only support level triggered events. The irq
+        * handler will be called continuously until the event has
+        * been cleared in OPAL.
+        */
+       if (flow_type != IRQ_TYPE_LEVEL_HIGH)
+               return -EINVAL;
+
+       return 0;
+}
+
+static struct opal_event_irqchip opal_event_irqchip = {
+       .irqchip = {
+               .name = "OPAL EVT",
+               .irq_mask = opal_event_mask,
+               .irq_unmask = opal_event_unmask,
+               .irq_set_type = opal_event_set_type,
+       },
+       .mask = 0,
+};
+
+static int opal_event_map(struct irq_domain *d, unsigned int irq,
+                       irq_hw_number_t hwirq)
+{
+       irq_set_chip_data(irq, &opal_event_irqchip);
+       irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip,
+                               handle_level_irq);
+
+       return 0;
+}
+
+void opal_handle_events(uint64_t events)
+{
+       int virq, hwirq = 0;
+       u64 mask = opal_event_irqchip.mask;
+
+       if (!in_irq() && (events & mask)) {
+               last_outstanding_events = events;
+               irq_work_queue(&opal_event_irq_work);
+               return;
+       }
+
+       while (events & mask) {
+               hwirq = fls64(events) - 1;
+               if (BIT_ULL(hwirq) & mask) {
+                       virq = irq_find_mapping(opal_event_irqchip.domain,
+                                               hwirq);
+                       if (virq)
+                               generic_handle_irq(virq);
+               }
+               events &= ~BIT_ULL(hwirq);
+       }
+}
+
+static irqreturn_t opal_interrupt(int irq, void *data)
+{
+       __be64 events;
+
+       opal_handle_interrupt(virq_to_hw(irq), &events);
+       opal_handle_events(be64_to_cpu(events));
+
+       return IRQ_HANDLED;
+}
+
+static void opal_handle_irq_work(struct irq_work *work)
+{
+       opal_handle_events(be64_to_cpu(last_outstanding_events));
+}
+
+static int opal_event_match(struct irq_domain *h, struct device_node *node)
+{
+       return h->of_node == node;
+}
+
+static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
+                          const u32 *intspec, unsigned int intsize,
+                          irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+       *out_hwirq = intspec[0];
+       *out_flags = IRQ_TYPE_LEVEL_HIGH;
+
+       return 0;
+}
+
+static const struct irq_domain_ops opal_event_domain_ops = {
+       .match  = opal_event_match,
+       .map    = opal_event_map,
+       .xlate  = opal_event_xlate,
+};
+
+void opal_event_shutdown(void)
+{
+       unsigned int i;
+
+       /* First free interrupts, which will also mask them */
+       for (i = 0; i < opal_irq_count; i++) {
+               if (opal_irqs[i])
+                       free_irq(opal_irqs[i], NULL);
+               opal_irqs[i] = 0;
+       }
+}
+
+int __init opal_event_init(void)
+{
+       struct device_node *dn, *opal_node;
+       const __be32 *irqs;
+       int i, irqlen, rc = 0;
+
+       opal_node = of_find_node_by_path("/ibm,opal");
+       if (!opal_node) {
+               pr_warn("opal: Node not found\n");
+               return -ENODEV;
+       }
+
+       /* If dn is NULL it means the domain won't be linked to a DT
+        * node so therefore irq_of_parse_and_map(...) wont work. But
+        * that shouldn't be problem because if we're running a
+        * version of skiboot that doesn't have the dn then the
+        * devices won't have the correct properties and will have to
+        * fall back to the legacy method (opal_event_request(...))
+        * anyway. */
+       dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event");
+       opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS,
+                               &opal_event_domain_ops, &opal_event_irqchip);
+       of_node_put(dn);
+       if (!opal_event_irqchip.domain) {
+               pr_warn("opal: Unable to create irq domain\n");
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       /* Get interrupt property */
+       irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
+       opal_irq_count = irqs ? (irqlen / 4) : 0;
+       pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
+
+       /* Install interrupt handlers */
+       opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL);
+       for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
+               unsigned int irq, virq;
+
+               /* Get hardware and virtual IRQ */
+               irq = be32_to_cpup(irqs);
+               virq = irq_create_mapping(NULL, irq);
+               if (virq == NO_IRQ) {
+                       pr_warn("Failed to map irq 0x%x\n", irq);
+                       continue;
+               }
+
+               /* Install interrupt handler */
+               rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+               if (rc) {
+                       irq_dispose_mapping(virq);
+                       pr_warn("Error %d requesting irq %d (0x%x)\n",
+                                rc, virq, irq);
+                       continue;
+               }
+
+               /* Cache IRQ */
+               opal_irqs[i] = virq;
+       }
+
+out:
+       of_node_put(opal_node);
+       return rc;
+}
+machine_arch_initcall(powernv, opal_event_init);
+
+/**
+ * opal_event_request(unsigned int opal_event_nr) - Request an event
+ * @opal_event_nr: the opal event number to request
+ *
+ * This routine can be used to find the linux virq number which can
+ * then be passed to request_irq to assign a handler for a particular
+ * opal event. This should only be used by legacy devices which don't
+ * have proper device tree bindings. Most devices should use
+ * irq_of_parse_and_map() instead.
+ */
+int opal_event_request(unsigned int opal_event_nr)
+{
+       if (WARN_ON_ONCE(!opal_event_irqchip.domain))
+               return NO_IRQ;
+
+       return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
+}
+EXPORT_SYMBOL(opal_event_request);
index 43db2136dbff8f67101d085b8e5ecef7f75dacdb..00a29432be39fb60f6209a625c98ee8b54504bb0 100644 (file)
@@ -144,4 +144,4 @@ static int __init opal_mem_err_init(void)
        }
        return 0;
 }
-machine_subsys_initcall(powernv, opal_mem_err_init);
+machine_device_initcall(powernv, opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
new file mode 100644 (file)
index 0000000..46cb3fe
--- /dev/null
@@ -0,0 +1,449 @@
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * Copyright IBM Corporation 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "opal-prd: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/poll.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/opal-prd.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+/**
+ * The msg member must be at the end of the struct, as it's followed by the
+ * message data.
+ */
+struct opal_prd_msg_queue_item {
+       struct list_head                list;
+       struct opal_prd_msg_header      msg;
+};
+
+static struct device_node *prd_node;
+static LIST_HEAD(opal_prd_msg_queue);
+static DEFINE_SPINLOCK(opal_prd_msg_queue_lock);
+static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait);
+static atomic_t prd_usage;
+
+static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size)
+{
+       struct device_node *parent, *node;
+       bool found;
+
+       if (addr + size < addr)
+               return false;
+
+       parent = of_find_node_by_path("/reserved-memory");
+       if (!parent)
+               return false;
+
+       found = false;
+
+       for_each_child_of_node(parent, node) {
+               uint64_t range_addr, range_size, range_end;
+               const __be32 *addrp;
+               const char *label;
+
+               addrp = of_get_address(node, 0, &range_size, NULL);
+
+               range_addr = of_read_number(addrp, 2);
+               range_end = range_addr + range_size;
+
+               label = of_get_property(node, "ibm,prd-label", NULL);
+
+               /* PRD ranges need a label */
+               if (!label)
+                       continue;
+
+               if (range_end <= range_addr)
+                       continue;
+
+               if (addr >= range_addr && addr + size <= range_end) {
+                       found = true;
+                       of_node_put(node);
+                       break;
+               }
+       }
+
+       of_node_put(parent);
+       return found;
+}
+
+static int opal_prd_open(struct inode *inode, struct file *file)
+{
+       /*
+        * Prevent multiple (separate) processes from concurrent interactions
+        * with the FW PRD channel
+        */
+       if (atomic_xchg(&prd_usage, 1) == 1)
+               return -EBUSY;
+
+       return 0;
+}
+
+/*
+ * opal_prd_mmap - maps firmware-provided ranges into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ */
+
+static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       size_t addr, size;
+       int rc;
+
+       pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
+                       vma->vm_start, vma->vm_end, vma->vm_pgoff,
+                       vma->vm_flags);
+
+       addr = vma->vm_pgoff << PAGE_SHIFT;
+       size = vma->vm_end - vma->vm_start;
+
+       /* ensure we're mapping within one of the allowable ranges */
+       if (!opal_prd_range_is_valid(addr, size))
+               return -EINVAL;
+
+       vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file,
+                                               vma->vm_pgoff,
+                                                size, vma->vm_page_prot))
+                                       | _PAGE_SPECIAL);
+
+       rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+                       vma->vm_page_prot);
+
+       return rc;
+}
+
+static bool opal_msg_queue_empty(void)
+{
+       unsigned long flags;
+       bool ret;
+
+       spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+       ret = list_empty(&opal_prd_msg_queue);
+       spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+       return ret;
+}
+
+static unsigned int opal_prd_poll(struct file *file,
+               struct poll_table_struct *wait)
+{
+       poll_wait(file, &opal_prd_msg_wait, wait);
+
+       if (!opal_msg_queue_empty())
+               return POLLIN | POLLRDNORM;
+
+       return 0;
+}
+
+static ssize_t opal_prd_read(struct file *file, char __user *buf,
+               size_t count, loff_t *ppos)
+{
+       struct opal_prd_msg_queue_item *item;
+       unsigned long flags;
+       ssize_t size, err;
+       int rc;
+
+       /* we need at least a header's worth of data */
+       if (count < sizeof(item->msg))
+               return -EINVAL;
+
+       if (*ppos)
+               return -ESPIPE;
+
+       item = NULL;
+
+       for (;;) {
+
+               spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+               if (!list_empty(&opal_prd_msg_queue)) {
+                       item = list_first_entry(&opal_prd_msg_queue,
+                                       struct opal_prd_msg_queue_item, list);
+                       list_del(&item->list);
+               }
+               spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+               if (item)
+                       break;
+
+               if (file->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+
+               rc = wait_event_interruptible(opal_prd_msg_wait,
+                               !opal_msg_queue_empty());
+               if (rc)
+                       return -EINTR;
+       }
+
+       size = be16_to_cpu(item->msg.size);
+       if (size > count) {
+               err = -EINVAL;
+               goto err_requeue;
+       }
+
+       rc = copy_to_user(buf, &item->msg, size);
+       if (rc) {
+               err = -EFAULT;
+               goto err_requeue;
+       }
+
+       kfree(item);
+
+       return size;
+
+err_requeue:
+       /* eep! re-queue at the head of the list */
+       spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+       list_add(&item->list, &opal_prd_msg_queue);
+       spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+       return err;
+}
+
+static ssize_t opal_prd_write(struct file *file, const char __user *buf,
+               size_t count, loff_t *ppos)
+{
+       struct opal_prd_msg_header hdr;
+       ssize_t size;
+       void *msg;
+       int rc;
+
+       size = sizeof(hdr);
+
+       if (count < size)
+               return -EINVAL;
+
+       /* grab the header */
+       rc = copy_from_user(&hdr, buf, sizeof(hdr));
+       if (rc)
+               return -EFAULT;
+
+       size = be16_to_cpu(hdr.size);
+
+       msg = kmalloc(size, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       rc = copy_from_user(msg, buf, size);
+       if (rc) {
+               size = -EFAULT;
+               goto out_free;
+       }
+
+       rc = opal_prd_msg(msg);
+       if (rc) {
+               pr_warn("write: opal_prd_msg returned %d\n", rc);
+               size = -EIO;
+       }
+
+out_free:
+       kfree(msg);
+
+       return size;
+}
+
+static int opal_prd_release(struct inode *inode, struct file *file)
+{
+       struct opal_prd_msg_header msg;
+
+       msg.size = cpu_to_be16(sizeof(msg));
+       msg.type = OPAL_PRD_MSG_TYPE_FINI;
+
+       opal_prd_msg((struct opal_prd_msg *)&msg);
+
+       atomic_xchg(&prd_usage, 0);
+
+       return 0;
+}
+
+static long opal_prd_ioctl(struct file *file, unsigned int cmd,
+               unsigned long param)
+{
+       struct opal_prd_info info;
+       struct opal_prd_scom scom;
+       int rc = 0;
+
+       switch (cmd) {
+       case OPAL_PRD_GET_INFO:
+               memset(&info, 0, sizeof(info));
+               info.version = OPAL_PRD_KERNEL_VERSION;
+               rc = copy_to_user((void __user *)param, &info, sizeof(info));
+               if (rc)
+                       return -EFAULT;
+               break;
+
+       case OPAL_PRD_SCOM_READ:
+               rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+               if (rc)
+                       return -EFAULT;
+
+               scom.rc = opal_xscom_read(scom.chip, scom.addr,
+                               (__be64 *)&scom.data);
+               scom.data = be64_to_cpu(scom.data);
+               pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n",
+                               scom.chip, scom.addr, scom.data, scom.rc);
+
+               rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+               if (rc)
+                       return -EFAULT;
+               break;
+
+       case OPAL_PRD_SCOM_WRITE:
+               rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+               if (rc)
+                       return -EFAULT;
+
+               scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data);
+               pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n",
+                               scom.chip, scom.addr, scom.data, scom.rc);
+
+               rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+               if (rc)
+                       return -EFAULT;
+               break;
+
+       default:
+               rc = -EINVAL;
+       }
+
+       return rc;
+}
+
+static const struct file_operations opal_prd_fops = {
+       .open           = opal_prd_open,
+       .mmap           = opal_prd_mmap,
+       .poll           = opal_prd_poll,
+       .read           = opal_prd_read,
+       .write          = opal_prd_write,
+       .unlocked_ioctl = opal_prd_ioctl,
+       .release        = opal_prd_release,
+       .owner          = THIS_MODULE,
+};
+
+static struct miscdevice opal_prd_dev = {
+       .minor          = MISC_DYNAMIC_MINOR,
+       .name           = "opal-prd",
+       .fops           = &opal_prd_fops,
+};
+
+/* opal interface */
+static int opal_prd_msg_notifier(struct notifier_block *nb,
+               unsigned long msg_type, void *_msg)
+{
+       struct opal_prd_msg_queue_item *item;
+       struct opal_prd_msg_header *hdr;
+       struct opal_msg *msg = _msg;
+       int msg_size, item_size;
+       unsigned long flags;
+
+       if (msg_type != OPAL_MSG_PRD)
+               return 0;
+
+       /* Calculate total size of the message and item we need to store. The
+        * 'size' field in the header includes the header itself. */
+       hdr = (void *)msg->params;
+       msg_size = be16_to_cpu(hdr->size);
+       item_size = msg_size + sizeof(*item) - sizeof(item->msg);
+
+       item = kzalloc(item_size, GFP_ATOMIC);
+       if (!item)
+               return -ENOMEM;
+
+       memcpy(&item->msg, msg->params, msg_size);
+
+       spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+       list_add_tail(&item->list, &opal_prd_msg_queue);
+       spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+       wake_up_interruptible(&opal_prd_msg_wait);
+
+       return 0;
+}
+
+static struct notifier_block opal_prd_event_nb = {
+       .notifier_call  = opal_prd_msg_notifier,
+       .next           = NULL,
+       .priority       = 0,
+};
+
+static int opal_prd_probe(struct platform_device *pdev)
+{
+       int rc;
+
+       if (!pdev || !pdev->dev.of_node)
+               return -ENODEV;
+
+       /* We should only have one prd driver instance per machine; ensure
+        * that we only get a valid probe on a single OF node.
+        */
+       if (prd_node)
+               return -EBUSY;
+
+       prd_node = pdev->dev.of_node;
+
+       rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb);
+       if (rc) {
+               pr_err("Couldn't register event notifier\n");
+               return rc;
+       }
+
+       rc = misc_register(&opal_prd_dev);
+       if (rc) {
+               pr_err("failed to register miscdev\n");
+               opal_message_notifier_unregister(OPAL_MSG_PRD,
+                               &opal_prd_event_nb);
+               return rc;
+       }
+
+       return 0;
+}
+
+static int opal_prd_remove(struct platform_device *pdev)
+{
+       misc_deregister(&opal_prd_dev);
+       opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+       return 0;
+}
+
+static const struct of_device_id opal_prd_match[] = {
+       { .compatible = "ibm,opal-prd" },
+       { },
+};
+
+static struct platform_driver opal_prd_driver = {
+       .driver = {
+               .name           = "opal-prd",
+               .owner          = THIS_MODULE,
+               .of_match_table = opal_prd_match,
+       },
+       .probe  = opal_prd_probe,
+       .remove = opal_prd_remove,
+};
+
+module_platform_driver(opal_prd_driver);
+
+MODULE_DEVICE_TABLE(of, opal_prd_match);
+MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver");
+MODULE_LICENSE("GPL");
index 655250499d18cd84e59a8217d5f102ffd15ddab0..a06059df9239202d660a5f7579034b715f9ee18a 100644 (file)
@@ -77,7 +77,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(opal_get_sensor_data);
 
-static __init int opal_sensor_init(void)
+int __init opal_sensor_init(void)
 {
        struct platform_device *pdev;
        struct device_node *sensor;
@@ -93,4 +93,3 @@ static __init int opal_sensor_init(void)
 
        return PTR_ERR_OR_ZERO(pdev);
 }
-machine_subsys_initcall(powernv, opal_sensor_init);
index 9d1acf22a099dfc1bf474f6244af11d1153742f6..afe66c576a385f4204860766fa69b9d6fd3e590f 100644 (file)
@@ -55,8 +55,10 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
        }
 
        ret = opal_get_param(token, param_id, (u64)buffer, length);
-       if (ret != OPAL_ASYNC_COMPLETION)
+       if (ret != OPAL_ASYNC_COMPLETION) {
+               ret = opal_error_code(ret);
                goto out_token;
+       }
 
        ret = opal_async_wait_response(token, &msg);
        if (ret) {
@@ -65,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
                goto out_token;
        }
 
-       ret = be64_to_cpu(msg.params[1]);
+       ret = opal_error_code(be64_to_cpu(msg.params[1]));
 
 out_token:
        opal_async_release_token(token);
@@ -89,8 +91,10 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
 
        ret = opal_set_param(token, param_id, (u64)buffer, length);
 
-       if (ret != OPAL_ASYNC_COMPLETION)
+       if (ret != OPAL_ASYNC_COMPLETION) {
+               ret = opal_error_code(ret);
                goto out_token;
+       }
 
        ret = opal_async_wait_response(token, &msg);
        if (ret) {
@@ -99,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
                goto out_token;
        }
 
-       ret = be64_to_cpu(msg.params[1]);
+       ret = opal_error_code(be64_to_cpu(msg.params[1]));
 
 out_token:
        opal_async_release_token(token);
@@ -162,10 +166,20 @@ void __init opal_sys_param_init(void)
                goto out;
        }
 
+       /* Some systems do not use sysparams; this is not an error */
+       sysparam = of_find_node_by_path("/ibm,opal/sysparams");
+       if (!sysparam)
+               goto out;
+
+       if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
+               pr_err("SYSPARAM: Opal sysparam node not compatible\n");
+               goto out_node_put;
+       }
+
        sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
        if (!sysparam_kobj) {
                pr_err("SYSPARAM: Failed to create sysparam kobject\n");
-               goto out;
+               goto out_node_put;
        }
 
        /* Allocate big enough buffer for any get/set transactions */
@@ -176,30 +190,19 @@ void __init opal_sys_param_init(void)
                goto out_kobj_put;
        }
 
-       sysparam = of_find_node_by_path("/ibm,opal/sysparams");
-       if (!sysparam) {
-               pr_err("SYSPARAM: Opal sysparam node not found\n");
-               goto out_param_buf;
-       }
-
-       if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
-               pr_err("SYSPARAM: Opal sysparam node not compatible\n");
-               goto out_node_put;
-       }
-
        /* Number of parameters exposed through DT */
        count = of_property_count_strings(sysparam, "param-name");
        if (count < 0) {
                pr_err("SYSPARAM: No string found of property param-name in "
                                "the node %s\n", sysparam->name);
-               goto out_node_put;
+               goto out_param_buf;
        }
 
        id = kzalloc(sizeof(*id) * count, GFP_KERNEL);
        if (!id) {
                pr_err("SYSPARAM: Failed to allocate memory to read parameter "
                                "id\n");
-               goto out_node_put;
+               goto out_param_buf;
        }
 
        size = kzalloc(sizeof(*size) * count, GFP_KERNEL);
@@ -293,12 +296,12 @@ out_free_size:
        kfree(size);
 out_free_id:
        kfree(id);
-out_node_put:
-       of_node_put(sysparam);
 out_param_buf:
        kfree(param_data_buf);
 out_kobj_put:
        kobject_put(sysparam_kobj);
+out_node_put:
+       of_node_put(sysparam);
 out:
        return;
 }
index a7ade94cdf87bb75be61616f8a6279bd94bb2377..d6a7b8252e4da205edb33d8de53d90eb329f9a94 100644 (file)
@@ -283,6 +283,7 @@ OPAL_CALL(opal_sensor_read,                 OPAL_SENSOR_READ);
 OPAL_CALL(opal_get_param,                      OPAL_GET_PARAM);
 OPAL_CALL(opal_set_param,                      OPAL_SET_PARAM);
 OPAL_CALL(opal_handle_hmi,                     OPAL_HANDLE_HMI);
+OPAL_CALL(opal_config_cpu_idle_state,          OPAL_CONFIG_CPU_IDLE_STATE);
 OPAL_CALL(opal_slw_set_reg,                    OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,           OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,         OPAL_UNREGISTER_DUMP_REGION);
@@ -295,3 +296,4 @@ OPAL_CALL(opal_i2c_request,                 OPAL_I2C_REQUEST);
 OPAL_CALL(opal_flash_read,                     OPAL_FLASH_READ);
 OPAL_CALL(opal_flash_write,                    OPAL_FLASH_WRITE);
 OPAL_CALL(opal_flash_erase,                    OPAL_FLASH_ERASE);
+OPAL_CALL(opal_prd_msg,                                OPAL_PRD_MSG);
index 2241565b0739ff3bc6dc84f9bc6c63c70edf8b8c..f084afa0e3baeb776ec3c653f093000509da1ee2 100644 (file)
@@ -53,13 +53,7 @@ static int mc_recoverable_range_len;
 
 struct device_node *opal_node;
 static DEFINE_SPINLOCK(opal_write_lock);
-static unsigned int *opal_irqs;
-static unsigned int opal_irq_count;
-static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
-static DEFINE_SPINLOCK(opal_notifier_lock);
-static uint64_t last_notified_mask = 0x0ul;
-static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
 static uint32_t opal_heartbeat;
 
 static void opal_reinit_cores(void)
@@ -225,82 +219,6 @@ static int __init opal_register_exception_handlers(void)
 }
 machine_early_initcall(powernv, opal_register_exception_handlers);
 
-int opal_notifier_register(struct notifier_block *nb)
-{
-       if (!nb) {
-               pr_warning("%s: Invalid argument (%p)\n",
-                          __func__, nb);
-               return -EINVAL;
-       }
-
-       atomic_notifier_chain_register(&opal_notifier_head, nb);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(opal_notifier_register);
-
-int opal_notifier_unregister(struct notifier_block *nb)
-{
-       if (!nb) {
-               pr_warning("%s: Invalid argument (%p)\n",
-                          __func__, nb);
-               return -EINVAL;
-       }
-
-       atomic_notifier_chain_unregister(&opal_notifier_head, nb);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(opal_notifier_unregister);
-
-static void opal_do_notifier(uint64_t events)
-{
-       unsigned long flags;
-       uint64_t changed_mask;
-
-       if (atomic_read(&opal_notifier_hold))
-               return;
-
-       spin_lock_irqsave(&opal_notifier_lock, flags);
-       changed_mask = last_notified_mask ^ events;
-       last_notified_mask = events;
-       spin_unlock_irqrestore(&opal_notifier_lock, flags);
-
-       /*
-        * We feed with the event bits and changed bits for
-        * enough information to the callback.
-        */
-       atomic_notifier_call_chain(&opal_notifier_head,
-                                  events, (void *)changed_mask);
-}
-
-void opal_notifier_update_evt(uint64_t evt_mask,
-                             uint64_t evt_val)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&opal_notifier_lock, flags);
-       last_notified_mask &= ~evt_mask;
-       last_notified_mask |= evt_val;
-       spin_unlock_irqrestore(&opal_notifier_lock, flags);
-}
-
-void opal_notifier_enable(void)
-{
-       int64_t rc;
-       __be64 evt = 0;
-
-       atomic_set(&opal_notifier_hold, 0);
-
-       /* Process pending events */
-       rc = opal_poll_events(&evt);
-       if (rc == OPAL_SUCCESS && evt)
-               opal_do_notifier(be64_to_cpu(evt));
-}
-
-void opal_notifier_disable(void)
-{
-       atomic_set(&opal_notifier_hold, 1);
-}
-
 /*
  * Opal message notifier based on message type. Allow subscribers to get
  * notified for specific messgae type.
@@ -317,6 +235,7 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
        return atomic_notifier_chain_register(
                                &opal_msg_notifier_head[msg_type], nb);
 }
+EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 
 int opal_message_notifier_unregister(enum opal_msg_type msg_type,
                                     struct notifier_block *nb)
@@ -324,6 +243,7 @@ int opal_message_notifier_unregister(enum opal_msg_type msg_type,
        return atomic_notifier_chain_unregister(
                        &opal_msg_notifier_head[msg_type], nb);
 }
+EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 
 static void opal_message_do_notify(uint32_t msg_type, void *msg)
 {
@@ -364,36 +284,36 @@ static void opal_handle_message(void)
        opal_message_do_notify(type, (void *)&msg);
 }
 
-static int opal_message_notify(struct notifier_block *nb,
-                         unsigned long events, void *change)
+static irqreturn_t opal_message_notify(int irq, void *data)
 {
-       if (events & OPAL_EVENT_MSG_PENDING)
-               opal_handle_message();
-       return 0;
+       opal_handle_message();
+       return IRQ_HANDLED;
 }
 
-static struct notifier_block opal_message_nb = {
-       .notifier_call  = opal_message_notify,
-       .next           = NULL,
-       .priority       = 0,
-};
-
 static int __init opal_message_init(void)
 {
-       int ret, i;
+       int ret, i, irq;
 
        for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
                ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
 
-       ret = opal_notifier_register(&opal_message_nb);
+       irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
+       if (!irq) {
+               pr_err("%s: Can't register OPAL event irq (%d)\n",
+                      __func__, irq);
+               return irq;
+       }
+
+       ret = request_irq(irq, opal_message_notify,
+                       IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
        if (ret) {
-               pr_err("%s: Can't register OPAL event notifier (%d)\n",
+               pr_err("%s: Can't request OPAL event irq (%d)\n",
                       __func__, ret);
                return ret;
        }
+
        return 0;
 }
-machine_early_initcall(powernv, opal_message_init);
 
 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 {
@@ -573,7 +493,7 @@ int opal_handle_hmi_exception(struct pt_regs *regs)
        local_paca->hmi_event_available = 0;
        rc = opal_poll_events(&evt);
        if (rc == OPAL_SUCCESS && evt)
-               opal_do_notifier(be64_to_cpu(evt));
+               opal_handle_events(be64_to_cpu(evt));
 
        return 1;
 }
@@ -610,17 +530,6 @@ out:
        return !!recover_addr;
 }
 
-static irqreturn_t opal_interrupt(int irq, void *data)
-{
-       __be64 events;
-
-       opal_handle_interrupt(virq_to_hw(irq), &events);
-
-       opal_do_notifier(be64_to_cpu(events));
-
-       return IRQ_HANDLED;
-}
-
 static int opal_sysfs_init(void)
 {
        opal_kobj = kobject_create_and_add("opal", firmware_kobj);
@@ -693,21 +602,13 @@ static void __init opal_dump_region_init(void)
                        "rc = %d\n", rc);
 }
 
-static void opal_flash_init(struct device_node *opal_node)
-{
-       struct device_node *np;
-
-       for_each_child_of_node(opal_node, np)
-               if (of_device_is_compatible(np, "ibm,opal-flash"))
-                       of_platform_device_create(np, NULL, NULL);
-}
-
-static void opal_ipmi_init(struct device_node *opal_node)
+static void opal_pdev_init(struct device_node *opal_node,
+               const char *compatible)
 {
        struct device_node *np;
 
        for_each_child_of_node(opal_node, np)
-               if (of_device_is_compatible(np, "ibm,opal-ipmi"))
+               if (of_device_is_compatible(np, compatible))
                        of_platform_device_create(np, NULL, NULL);
 }
 
@@ -719,52 +620,15 @@ static void opal_i2c_create_devs(void)
                of_platform_device_create(np, NULL, NULL);
 }
 
-static void __init opal_irq_init(struct device_node *dn)
-{
-       const __be32 *irqs;
-       int i, irqlen;
-
-       /* Get interrupt property */
-       irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
-       opal_irq_count = irqs ? (irqlen / 4) : 0;
-       pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
-       if (!opal_irq_count)
-               return;
-
-       /* Install interrupt handlers */
-       opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
-       for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
-               unsigned int irq, virq;
-               int rc;
-
-               /* Get hardware and virtual IRQ */
-               irq = be32_to_cpup(irqs);
-               virq = irq_create_mapping(NULL, irq);
-               if (virq == NO_IRQ) {
-                       pr_warn("Failed to map irq 0x%x\n", irq);
-                       continue;
-               }
-
-               /* Install interrupt handler */
-               rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
-               if (rc) {
-                       irq_dispose_mapping(virq);
-                       pr_warn("Error %d requesting irq %d (0x%x)\n",
-                                rc, virq, irq);
-                       continue;
-               }
-
-               /* Cache IRQ */
-               opal_irqs[i] = virq;
-       }
-}
-
 static int kopald(void *unused)
 {
+       __be64 events;
+
        set_freezable();
        do {
                try_to_freeze();
-               opal_poll_events(NULL);
+               opal_poll_events(&events);
+               opal_handle_events(be64_to_cpu(events));
                msleep_interruptible(opal_heartbeat);
        } while (!kthread_should_stop());
 
@@ -807,15 +671,24 @@ static int __init opal_init(void)
                of_node_put(consoles);
        }
 
+       /* Initialise OPAL messaging system */
+       opal_message_init();
+
+       /* Initialise OPAL asynchronous completion interface */
+       opal_async_comp_init();
+
+       /* Initialise OPAL sensor interface */
+       opal_sensor_init();
+
+       /* Initialise OPAL hypervisor maintainence interrupt handling */
+       opal_hmi_handler_init();
+
        /* Create i2c platform devices */
        opal_i2c_create_devs();
 
        /* Setup a heatbeat thread if requested by OPAL */
        opal_init_heartbeat();
 
-       /* Find all OPAL interrupts and request them */
-       opal_irq_init(opal_node);
-
        /* Create "opal" kobject under /sys/firmware */
        rc = opal_sysfs_init();
        if (rc == 0) {
@@ -835,10 +708,10 @@ static int __init opal_init(void)
                opal_msglog_init();
        }
 
-       /* Initialize OPAL IPMI backend */
-       opal_ipmi_init(opal_node);
-
-       opal_flash_init(opal_node);
+       /* Initialize platform devices: IPMI backend, PRD & flash interface */
+       opal_pdev_init(opal_node, "ibm,opal-ipmi");
+       opal_pdev_init(opal_node, "ibm,opal-flash");
+       opal_pdev_init(opal_node, "ibm,opal-prd");
 
        return 0;
 }
@@ -846,15 +719,9 @@ machine_subsys_initcall(powernv, opal_init);
 
 void opal_shutdown(void)
 {
-       unsigned int i;
        long rc = OPAL_BUSY;
 
-       /* First free interrupts, which will also mask them */
-       for (i = 0; i < opal_irq_count; i++) {
-               if (opal_irqs[i])
-                       free_irq(opal_irqs[i], NULL);
-               opal_irqs[i] = 0;
-       }
+       opal_event_shutdown();
 
        /*
         * Then sync with OPAL which ensure anything that can
@@ -876,11 +743,14 @@ void opal_shutdown(void)
 
 /* Export this so that test modules can use it */
 EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_xscom_read);
+EXPORT_SYMBOL_GPL(opal_xscom_write);
 EXPORT_SYMBOL_GPL(opal_ipmi_send);
 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
 EXPORT_SYMBOL_GPL(opal_flash_read);
 EXPORT_SYMBOL_GPL(opal_flash_write);
 EXPORT_SYMBOL_GPL(opal_flash_erase);
+EXPORT_SYMBOL_GPL(opal_prd_msg);
 
 /* Convert a region of vmalloc memory to an opal sg list */
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -954,6 +824,7 @@ int opal_error_code(int rc)
        case OPAL_ASYNC_COMPLETION:     return -EINPROGRESS;
        case OPAL_BUSY_EVENT:           return -EBUSY;
        case OPAL_NO_MEM:               return -ENOMEM;
+       case OPAL_PERMISSION:           return -EPERM;
 
        case OPAL_UNSUPPORTED:          return -EIO;
        case OPAL_HARDWARE:             return -EIO;
index f8bc950efcae39a63f213290b65d36fec1667b60..5738d315248b202b4a26aff084b07e819a80855c 100644 (file)
@@ -23,6 +23,9 @@
 #include <linux/io.h>
 #include <linux/msi.h>
 #include <linux/memblock.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -38,8 +41,9 @@
 #include <asm/debug.h>
 #include <asm/firmware.h>
 #include <asm/pnv-pci.h>
+#include <asm/mmzone.h>
 
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "powernv.h"
 #include "pci.h"
 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
 #define TCE32_TABLE_SIZE       ((0x10000000 / 0x1000) * 8)
 
+#define POWERNV_IOMMU_DEFAULT_LEVELS   1
+#define POWERNV_IOMMU_MAX_LEVELS       5
+
+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
                            const char *fmt, ...)
 {
@@ -1086,10 +1095,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
                return;
        }
 
-       pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
-                       GFP_KERNEL, hose->node);
-       pe->tce32_table->data = pe;
-
        /* Associate it with all child devices */
        pnv_ioda_setup_same_PE(bus, pe);
 
@@ -1283,36 +1288,27 @@ m64_failed:
        return -EBUSY;
 }
 
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+               int num);
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+
 static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
 {
-       struct pci_bus        *bus;
-       struct pci_controller *hose;
-       struct pnv_phb        *phb;
        struct iommu_table    *tbl;
-       unsigned long         addr;
        int64_t               rc;
 
-       bus = dev->bus;
-       hose = pci_bus_to_host(bus);
-       phb = hose->private_data;
-       tbl = pe->tce32_table;
-       addr = tbl->it_base;
-
-       opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
-                                  pe->pe_number << 1, 1, __pa(addr),
-                                  0, 0x1000);
-
-       rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
-                                       pe->pe_number,
-                                       (pe->pe_number << 1) + 1,
-                                       pe->tce_bypass_base,
-                                       0);
+       tbl = pe->table_group.tables[0];
+       rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
        if (rc)
                pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
 
+       pnv_pci_ioda2_set_bypass(pe, false);
+       if (pe->table_group.group) {
+               iommu_group_put(pe->table_group.group);
+               BUG_ON(pe->table_group.group);
+       }
+       pnv_pci_ioda2_table_free_pages(tbl);
        iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
-       free_pages(addr, get_order(TCE32_TABLE_SIZE));
-       pe->tce32_table = NULL;
 }
 
 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -1460,10 +1456,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
                        continue;
                }
 
-               pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
-                               GFP_KERNEL, hose->node);
-               pe->tce32_table->data = pe;
-
                /* Put PE to the list */
                mutex_lock(&phb->ioda.pe_list_mutex);
                list_add_tail(&pe->list, &phb->ioda.pe_list);
@@ -1598,12 +1590,19 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 
        pe = &phb->ioda.pe_array[pdn->pe_number];
        WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
-       set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
+       set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
+       /*
+        * Note: iommu_add_device() will fail here as
+        * for physical PE: the device is already added by now;
+        * for virtual PE: sysfs entries are not ready yet and
+        * tce_iommu_bus_notifier will add the device to a group later.
+        */
 }
 
-static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
-                                    struct pci_dev *pdev, u64 dma_mask)
+static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 {
+       struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+       struct pnv_phb *phb = hose->private_data;
        struct pci_dn *pdn = pci_get_pdn(pdev);
        struct pnv_ioda_pe *pe;
        uint64_t top;
@@ -1625,7 +1624,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
        } else {
                dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
                set_dma_ops(&pdev->dev, &dma_iommu_ops);
-               set_iommu_table_base(&pdev->dev, pe->tce32_table);
+               set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
        }
        *pdev->dev.dma_mask = dma_mask;
        return 0;
@@ -1654,36 +1653,36 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
 }
 
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
-                                  struct pci_bus *bus,
-                                  bool add_to_iommu_group)
+                                  struct pci_bus *bus)
 {
        struct pci_dev *dev;
 
        list_for_each_entry(dev, &bus->devices, bus_list) {
-               if (add_to_iommu_group)
-                       set_iommu_table_base_and_group(&dev->dev,
-                                                      pe->tce32_table);
-               else
-                       set_iommu_table_base(&dev->dev, pe->tce32_table);
+               set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+               iommu_add_device(&dev->dev);
 
-               if (dev->subordinate)
-                       pnv_ioda_setup_bus_dma(pe, dev->subordinate,
-                                              add_to_iommu_group);
+               if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+                       pnv_ioda_setup_bus_dma(pe, dev->subordinate);
        }
 }
 
-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
-                                        struct iommu_table *tbl,
-                                        __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+               unsigned long index, unsigned long npages, bool rm)
 {
+       struct iommu_table_group_link *tgl = list_first_entry_or_null(
+                       &tbl->it_group_list, struct iommu_table_group_link,
+                       next);
+       struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+                       struct pnv_ioda_pe, table_group);
        __be64 __iomem *invalidate = rm ?
-               (__be64 __iomem *)pe->tce_inval_reg_phys :
-               (__be64 __iomem *)tbl->it_index;
+               (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+               pe->phb->ioda.tce_inval_reg;
        unsigned long start, end, inc;
        const unsigned shift = tbl->it_page_shift;
 
-       start = __pa(startp);
-       end = __pa(endp);
+       start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
+       end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
+                       npages - 1);
 
        /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
        if (tbl->it_busno) {
@@ -1719,26 +1718,79 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
         */
 }
 
-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
-                                        struct iommu_table *tbl,
-                                        __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+               long npages, unsigned long uaddr,
+               enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+                       attrs);
+
+       if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+               pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+
+       return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+       if (!ret && (tbl->it_type &
+                       (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+               pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false);
+
+       return ret;
+}
+#endif
+
+static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
+               long npages)
+{
+       pnv_tce_free(tbl, index, npages);
+
+       if (tbl->it_type & TCE_PCI_SWINV_FREE)
+               pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+       .set = pnv_ioda1_tce_build,
+#ifdef CONFIG_IOMMU_API
+       .exchange = pnv_ioda1_tce_xchg,
+#endif
+       .clear = pnv_ioda1_tce_free,
+       .get = pnv_tce_get,
+};
+
+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+       /* 01xb - invalidate TCEs that match the specified PE# */
+       unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+       struct pnv_phb *phb = pe->phb;
+
+       if (!phb->ioda.tce_inval_reg)
+               return;
+
+       mb(); /* Ensure above stores are visible */
+       __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
+               __be64 __iomem *invalidate, unsigned shift,
+               unsigned long index, unsigned long npages)
 {
        unsigned long start, end, inc;
-       __be64 __iomem *invalidate = rm ?
-               (__be64 __iomem *)pe->tce_inval_reg_phys :
-               (__be64 __iomem *)tbl->it_index;
-       const unsigned shift = tbl->it_page_shift;
 
        /* We'll invalidate DMA address in PE scope */
        start = 0x2ull << 60;
-       start |= (pe->pe_number & 0xFF);
+       start |= (pe_number & 0xFF);
        end = start;
 
        /* Figure out the start, end and step */
-       inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
-       start |= (inc << shift);
-       inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
-       end |= (inc << shift);
+       start |= (index << shift);
+       end |= ((index + npages - 1) << shift);
        inc = (0x1ull << shift);
        mb();
 
@@ -1751,25 +1803,83 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
        }
 }
 
-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-                                __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+               unsigned long index, unsigned long npages, bool rm)
 {
-       struct pnv_ioda_pe *pe = tbl->data;
-       struct pnv_phb *phb = pe->phb;
+       struct iommu_table_group_link *tgl;
 
-       if (phb->type == PNV_PHB_IODA1)
-               pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
-       else
-               pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+       list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+               struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+                               struct pnv_ioda_pe, table_group);
+               __be64 __iomem *invalidate = rm ?
+                       (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+                       pe->phb->ioda.tce_inval_reg;
+
+               pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
+                       invalidate, tbl->it_page_shift,
+                       index, npages);
+       }
+}
+
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+               long npages, unsigned long uaddr,
+               enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+                       attrs);
+
+       if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+               pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+
+       return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+       if (!ret && (tbl->it_type &
+                       (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+               pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
+
+       return ret;
+}
+#endif
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+               long npages)
+{
+       pnv_tce_free(tbl, index, npages);
+
+       if (tbl->it_type & TCE_PCI_SWINV_FREE)
+               pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+}
+
+static void pnv_ioda2_table_free(struct iommu_table *tbl)
+{
+       pnv_pci_ioda2_table_free_pages(tbl);
+       iommu_free_table(tbl, "pnv");
 }
 
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+       .set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+       .exchange = pnv_ioda2_tce_xchg,
+#endif
+       .clear = pnv_ioda2_tce_free,
+       .get = pnv_tce_get,
+       .free = pnv_ioda2_table_free,
+};
+
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                                      struct pnv_ioda_pe *pe, unsigned int base,
                                      unsigned int segs)
 {
 
        struct page *tce_mem = NULL;
-       const __be64 *swinvp;
        struct iommu_table *tbl;
        unsigned int i;
        int64_t rc;
@@ -1783,6 +1893,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
        if (WARN_ON(pe->tce32_seg >= 0))
                return;
 
+       tbl = pnv_pci_table_alloc(phb->hose->node);
+       iommu_register_group(&pe->table_group, phb->hose->global_number,
+                       pe->pe_number);
+       pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
+
        /* Grab a 32-bit TCE table */
        pe->tce32_seg = base;
        pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
@@ -1817,39 +1932,30 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
        }
 
        /* Setup linux iommu table */
-       tbl = pe->tce32_table;
        pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
                                  base << 28, IOMMU_PAGE_SHIFT_4K);
 
        /* OPAL variant of P7IOC SW invalidated TCEs */
-       swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
-       if (swinvp) {
-               /* We need a couple more fields -- an address and a data
-                * to or.  Since the bus is only printed out on table free
-                * errors, and on the first pass the data will be a relative
-                * bus number, print that out instead.
-                */
-               pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
-               tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
-                               8);
+       if (phb->ioda.tce_inval_reg)
                tbl->it_type |= (TCE_PCI_SWINV_CREATE |
                                 TCE_PCI_SWINV_FREE   |
                                 TCE_PCI_SWINV_PAIR);
-       }
+
+       tbl->it_ops = &pnv_ioda1_iommu_ops;
+       pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
+       pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
        iommu_init_table(tbl, phb->hose->node);
 
        if (pe->flags & PNV_IODA_PE_DEV) {
-               iommu_register_group(tbl, phb->hose->global_number,
-                                    pe->pe_number);
-               set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-       } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
-               iommu_register_group(tbl, phb->hose->global_number,
-                                    pe->pe_number);
-               pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
-       } else if (pe->flags & PNV_IODA_PE_VF) {
-               iommu_register_group(tbl, phb->hose->global_number,
-                                    pe->pe_number);
-       }
+               /*
+                * Setting table base here only for carrying iommu_group
+                * further down to let iommu_add_device() do the job.
+                * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+                */
+               set_iommu_table_base(&pe->pdev->dev, tbl);
+               iommu_add_device(&pe->pdev->dev);
+       } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+               pnv_ioda_setup_bus_dma(pe, pe->pbus);
 
        return;
  fail:
@@ -1858,11 +1964,53 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                pe->tce32_seg = -1;
        if (tce_mem)
                __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+       if (tbl) {
+               pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+               iommu_free_table(tbl, "pnv");
+       }
 }
 
-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
+               int num, struct iommu_table *tbl)
+{
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                       table_group);
+       struct pnv_phb *phb = pe->phb;
+       int64_t rc;
+       const unsigned long size = tbl->it_indirect_levels ?
+                       tbl->it_level_size : tbl->it_size;
+       const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+       const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+       pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
+                       start_addr, start_addr + win_size - 1,
+                       IOMMU_PAGE_SIZE(tbl));
+
+       /*
+        * Map TCE table through TVT. The TVE index is the PE number
+        * shifted by 1 bit for 32-bits DMA space.
+        */
+       rc = opal_pci_map_pe_dma_window(phb->opal_id,
+                       pe->pe_number,
+                       (pe->pe_number << 1) + num,
+                       tbl->it_indirect_levels + 1,
+                       __pa(tbl->it_base),
+                       size << 3,
+                       IOMMU_PAGE_SIZE(tbl));
+       if (rc) {
+               pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
+               return rc;
+       }
+
+       pnv_pci_link_table_and_group(phb->hose->node, num,
+                       tbl, &pe->table_group);
+       pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+       return 0;
+}
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
 {
-       struct pnv_ioda_pe *pe = tbl->data;
        uint16_t window_id = (pe->pe_number << 1 ) + 1;
        int64_t rc;
 
@@ -1882,17 +2030,6 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
                                                     window_id,
                                                     pe->tce_bypass_base,
                                                     0);
-
-               /*
-                * EEH needs the mapping between IOMMU table and group
-                * of those VFIO/KVM pass-through devices. We can postpone
-                * resetting DMA ops until the DMA mask is configured in
-                * host side.
-                */
-               if (pe->pdev)
-                       set_iommu_table_base(&pe->pdev->dev, tbl);
-               else
-                       pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
        }
        if (rc)
                pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
@@ -1900,106 +2037,363 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
                pe->tce_bypass_enabled = enable;
 }
 
-static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
-                                         struct pnv_ioda_pe *pe)
+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+               __u32 page_shift, __u64 window_size, __u32 levels,
+               struct iommu_table *tbl);
+
+static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+               int num, __u32 page_shift, __u64 window_size, __u32 levels,
+               struct iommu_table **ptbl)
 {
-       /* TVE #1 is selected by PCI address bit 59 */
-       pe->tce_bypass_base = 1ull << 59;
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                       table_group);
+       int nid = pe->phb->hose->node;
+       __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start;
+       long ret;
+       struct iommu_table *tbl;
 
-       /* Install set_bypass callback for VFIO */
-       pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+       tbl = pnv_pci_table_alloc(nid);
+       if (!tbl)
+               return -ENOMEM;
 
-       /* Enable bypass by default */
-       pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+       ret = pnv_pci_ioda2_table_alloc_pages(nid,
+                       bus_offset, page_shift, window_size,
+                       levels, tbl);
+       if (ret) {
+               iommu_free_table(tbl, "pnv");
+               return ret;
+       }
+
+       tbl->it_ops = &pnv_ioda2_iommu_ops;
+       if (pe->phb->ioda.tce_inval_reg)
+               tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+
+       *ptbl = tbl;
+
+       return 0;
 }
 
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-                                      struct pnv_ioda_pe *pe)
+static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+{
+       struct iommu_table *tbl = NULL;
+       long rc;
+
+       rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
+                       IOMMU_PAGE_SHIFT_4K,
+                       pe->table_group.tce32_size,
+                       POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
+       if (rc) {
+               pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
+                               rc);
+               return rc;
+       }
+
+       iommu_init_table(tbl, pe->phb->hose->node);
+
+       rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+       if (rc) {
+               pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
+                               rc);
+               pnv_ioda2_table_free(tbl);
+               return rc;
+       }
+
+       if (!pnv_iommu_bypass_disabled)
+               pnv_pci_ioda2_set_bypass(pe, true);
+
+       /* OPAL variant of PHB3 invalidated TCEs */
+       if (pe->phb->ioda.tce_inval_reg)
+               tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+
+       /*
+        * Setting table base here only for carrying iommu_group
+        * further down to let iommu_add_device() do the job.
+        * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+        */
+       if (pe->flags & PNV_IODA_PE_DEV)
+               set_iommu_table_base(&pe->pdev->dev, tbl);
+
+       return 0;
+}
+
+#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+               int num)
+{
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                       table_group);
+       struct pnv_phb *phb = pe->phb;
+       long ret;
+
+       pe_info(pe, "Removing DMA window #%d\n", num);
+
+       ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+                       (pe->pe_number << 1) + num,
+                       0/* levels */, 0/* table address */,
+                       0/* table size */, 0/* page size */);
+       if (ret)
+               pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
+       else
+               pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+       pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
+
+       return ret;
+}
+#endif
+
+#ifdef CONFIG_IOMMU_API
+static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+               __u64 window_size, __u32 levels)
+{
+       unsigned long bytes = 0;
+       const unsigned window_shift = ilog2(window_size);
+       unsigned entries_shift = window_shift - page_shift;
+       unsigned table_shift = entries_shift + 3;
+       unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift);
+       unsigned long direct_table_size;
+
+       if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
+                       (window_size > memory_hotplug_max()) ||
+                       !is_power_of_2(window_size))
+               return 0;
+
+       /* Calculate a direct table size from window_size and levels */
+       entries_shift = (entries_shift + levels - 1) / levels;
+       table_shift = entries_shift + 3;
+       table_shift = max_t(unsigned, table_shift, PAGE_SHIFT);
+       direct_table_size =  1UL << table_shift;
+
+       for ( ; levels; --levels) {
+               bytes += _ALIGN_UP(tce_table_size, direct_table_size);
+
+               tce_table_size /= direct_table_size;
+               tce_table_size <<= 3;
+               tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size);
+       }
+
+       return bytes;
+}
+
+static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+{
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                                               table_group);
+       /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
+       struct iommu_table *tbl = pe->table_group.tables[0];
+
+       pnv_pci_ioda2_set_bypass(pe, false);
+       pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+       pnv_ioda2_table_free(tbl);
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+       struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+                                               table_group);
+
+       pnv_pci_ioda2_setup_default_config(pe);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+       .get_table_size = pnv_pci_ioda2_get_table_size,
+       .create_table = pnv_pci_ioda2_create_table,
+       .set_window = pnv_pci_ioda2_set_window,
+       .unset_window = pnv_pci_ioda2_unset_window,
+       .take_ownership = pnv_ioda2_take_ownership,
+       .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
 {
-       struct page *tce_mem = NULL;
-       void *addr;
        const __be64 *swinvp;
-       struct iommu_table *tbl;
-       unsigned int tce_table_size, end;
-       int64_t rc;
 
-       /* We shouldn't already have a 32-bit DMA associated */
-       if (WARN_ON(pe->tce32_seg >= 0))
+       /* OPAL variant of PHB3 invalidated TCEs */
+       swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+       if (!swinvp)
                return;
 
-       /* The PE will reserve all possible 32-bits space */
-       pe->tce32_seg = 0;
-       end = (1 << ilog2(phb->ioda.m32_pci_base));
-       tce_table_size = (end / 0x1000) * 8;
-       pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
-               end);
+       phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp);
+       phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8);
+}
 
-       /* Allocate TCE table */
-       tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
-                                  get_order(tce_table_size));
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
+               unsigned levels, unsigned long limit,
+               unsigned long *current_offset)
+{
+       struct page *tce_mem = NULL;
+       __be64 *addr, *tmp;
+       unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+       unsigned long allocated = 1UL << (order + PAGE_SHIFT);
+       unsigned entries = 1UL << (shift - 3);
+       long i;
+
+       tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
        if (!tce_mem) {
-               pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
-               goto fail;
+               pr_err("Failed to allocate a TCE memory, order=%d\n", order);
+               return NULL;
        }
        addr = page_address(tce_mem);
-       memset(addr, 0, tce_table_size);
+       memset(addr, 0, allocated);
+
+       --levels;
+       if (!levels) {
+               *current_offset += allocated;
+               return addr;
+       }
+
+       for (i = 0; i < entries; ++i) {
+               tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+                               levels, limit, current_offset);
+               if (!tmp)
+                       break;
+
+               addr[i] = cpu_to_be64(__pa(tmp) |
+                               TCE_PCI_READ | TCE_PCI_WRITE);
+
+               if (*current_offset >= limit)
+                       break;
+       }
+
+       return addr;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+               unsigned long size, unsigned level);
+
+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+               __u32 page_shift, __u64 window_size, __u32 levels,
+               struct iommu_table *tbl)
+{
+       void *addr;
+       unsigned long offset = 0, level_shift;
+       const unsigned window_shift = ilog2(window_size);
+       unsigned entries_shift = window_shift - page_shift;
+       unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
+       const unsigned long tce_table_size = 1UL << table_shift;
+
+       if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+               return -EINVAL;
+
+       if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
+               return -EINVAL;
+
+       /* Adjust direct table size from window_size and levels */
+       entries_shift = (entries_shift + levels - 1) / levels;
+       level_shift = entries_shift + 3;
+       level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+
+       /* Allocate TCE table */
+       addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+                       levels, tce_table_size, &offset);
+
+       /* addr==NULL means that the first level allocation failed */
+       if (!addr)
+               return -ENOMEM;
 
        /*
-        * Map TCE table through TVT. The TVE index is the PE number
-        * shifted by 1 bit for 32-bits DMA space.
+        * First level was allocated but some lower level failed as
+        * we did not allocate as much as we wanted,
+        * release partially allocated table.
         */
-       rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
-                                       pe->pe_number << 1, 1, __pa(addr),
-                                       tce_table_size, 0x1000);
-       if (rc) {
-               pe_err(pe, "Failed to configure 32-bit TCE table,"
-                      " err %ld\n", rc);
-               goto fail;
+       if (offset < tce_table_size) {
+               pnv_pci_ioda2_table_do_free_pages(addr,
+                               1ULL << (level_shift - 3), levels - 1);
+               return -ENOMEM;
        }
 
        /* Setup linux iommu table */
-       tbl = pe->tce32_table;
-       pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
-                       IOMMU_PAGE_SHIFT_4K);
+       pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+                       page_shift);
+       tbl->it_level_size = 1ULL << (level_shift - 3);
+       tbl->it_indirect_levels = levels - 1;
+       tbl->it_allocated_size = offset;
 
-       /* OPAL variant of PHB3 invalidated TCEs */
-       swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
-       if (swinvp) {
-               /* We need a couple more fields -- an address and a data
-                * to or.  Since the bus is only printed out on table free
-                * errors, and on the first pass the data will be a relative
-                * bus number, print that out instead.
-                */
-               pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
-               tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
-                               8);
-               tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+       pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
+                       window_size, tce_table_size, bus_offset);
+
+       return 0;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+               unsigned long size, unsigned level)
+{
+       const unsigned long addr_ul = (unsigned long) addr &
+                       ~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+       if (level) {
+               long i;
+               u64 *tmp = (u64 *) addr_ul;
+
+               for (i = 0; i < size; ++i) {
+                       unsigned long hpa = be64_to_cpu(tmp[i]);
+
+                       if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+                               continue;
+
+                       pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+                                       level - 1);
+               }
        }
-       iommu_init_table(tbl, phb->hose->node);
 
-       if (pe->flags & PNV_IODA_PE_DEV) {
-               iommu_register_group(tbl, phb->hose->global_number,
-                                    pe->pe_number);
-               set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
-       } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
-               iommu_register_group(tbl, phb->hose->global_number,
-                                    pe->pe_number);
-               pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
-       } else if (pe->flags & PNV_IODA_PE_VF) {
-               iommu_register_group(tbl, phb->hose->global_number,
-                                    pe->pe_number);
-       }
-
-       /* Also create a bypass window */
-       if (!pnv_iommu_bypass_disabled)
-               pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+       free_pages(addr_ul, get_order(size << 3));
+}
 
-       return;
-fail:
-       if (pe->tce32_seg >= 0)
-               pe->tce32_seg = -1;
-       if (tce_mem)
-               __free_pages(tce_mem, get_order(tce_table_size));
+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+       const unsigned long size = tbl->it_indirect_levels ?
+                       tbl->it_level_size : tbl->it_size;
+
+       if (!tbl->it_size)
+               return;
+
+       pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+                       tbl->it_indirect_levels);
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+                                      struct pnv_ioda_pe *pe)
+{
+       int64_t rc;
+
+       /* We shouldn't already have a 32-bit DMA associated */
+       if (WARN_ON(pe->tce32_seg >= 0))
+               return;
+
+       /* TVE #1 is selected by PCI address bit 59 */
+       pe->tce_bypass_base = 1ull << 59;
+
+       iommu_register_group(&pe->table_group, phb->hose->global_number,
+                       pe->pe_number);
+
+       /* The PE will reserve all possible 32-bits space */
+       pe->tce32_seg = 0;
+       pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+               phb->ioda.m32_pci_base);
+
+       /* Setup linux iommu table */
+       pe->table_group.tce32_start = 0;
+       pe->table_group.tce32_size = phb->ioda.m32_pci_base;
+       pe->table_group.max_dynamic_windows_supported =
+                       IOMMU_TABLE_GROUP_MAX_TABLES;
+       pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
+       pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M;
+#ifdef CONFIG_IOMMU_API
+       pe->table_group.ops = &pnv_pci_ioda2_ops;
+#endif
+
+       rc = pnv_pci_ioda2_setup_default_config(pe);
+       if (rc) {
+               if (pe->tce32_seg >= 0)
+                       pe->tce32_seg = -1;
+               return;
+       }
+
+       if (pe->flags & PNV_IODA_PE_DEV)
+               iommu_add_device(&pe->pdev->dev);
+       else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+               pnv_ioda_setup_bus_dma(pe, pe->pbus);
 }
 
 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
@@ -2024,6 +2418,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
        pr_info("PCI: %d PE# for a total weight of %d\n",
                phb->ioda.dma_pe_count, phb->ioda.dma_weight);
 
+       pnv_pci_ioda_setup_opal_tce_kill(phb);
+
        /* Walk our PE list and configure their DMA segments, hand them
         * out one base segment plus any residual segments based on
         * weight
@@ -2642,12 +3038,27 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
        return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
 }
 
-static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
 {
+       struct pnv_phb *phb = hose->private_data;
+
        opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
                       OPAL_ASSERT_RESET);
 }
 
+static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
+       .dma_dev_setup = pnv_pci_dma_dev_setup,
+#ifdef CONFIG_PCI_MSI
+       .setup_msi_irqs = pnv_setup_msi_irqs,
+       .teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+       .enable_device_hook = pnv_pci_enable_device_hook,
+       .window_alignment = pnv_pci_window_alignment,
+       .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+       .dma_set_mask = pnv_pci_ioda_dma_set_mask,
+       .shutdown = pnv_pci_ioda_shutdown,
+};
+
 static void __init pnv_pci_init_ioda_phb(struct device_node *np,
                                         u64 hub_id, int ioda_type)
 {
@@ -2791,12 +3202,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
        /* Setup TCEs */
        phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
-       phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
        phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
 
-       /* Setup shutdown function for kexec */
-       phb->shutdown = pnv_pci_ioda_shutdown;
-
        /* Setup MSI support */
        pnv_pci_init_ioda_msis(phb);
 
@@ -2808,10 +3215,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
         * the child P2P bridges) can form individual PE.
         */
        ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
-       pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
-       pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
-       pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
-       hose->controller_ops = pnv_pci_controller_ops;
+       hose->controller_ops = pnv_pci_ioda_controller_ops;
 
 #ifdef CONFIG_PCI_IOV
        ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
index 4729ca793813cfe908e0d5818766ac130cfdafd6..f2bdfea3b68d067cb28a91d259e84ec126783001 100644 (file)
@@ -83,18 +83,42 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
 static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
 #endif /* CONFIG_PCI_MSI */
 
+static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
+       .set = pnv_tce_build,
+#ifdef CONFIG_IOMMU_API
+       .exchange = pnv_tce_xchg,
+#endif
+       .clear = pnv_tce_free,
+       .get = pnv_tce_get,
+};
+
 static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
                                         struct pci_dev *pdev)
 {
-       if (phb->p5ioc2.iommu_table.it_map == NULL) {
-               iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
-               iommu_register_group(&phb->p5ioc2.iommu_table,
+       struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
+
+       if (!tbl->it_map) {
+               tbl->it_ops = &pnv_p5ioc2_iommu_ops;
+               iommu_init_table(tbl, phb->hose->node);
+               iommu_register_group(&phb->p5ioc2.table_group,
                                pci_domain_nr(phb->hose->bus), phb->opal_id);
+               INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+               pnv_pci_link_table_and_group(phb->hose->node, 0,
+                               tbl, &phb->p5ioc2.table_group);
        }
 
-       set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
+       set_iommu_table_base(&pdev->dev, tbl);
+       iommu_add_device(&pdev->dev);
 }
 
+static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
+       .dma_dev_setup = pnv_pci_dma_dev_setup,
+#ifdef CONFIG_PCI_MSI
+       .setup_msi_irqs = pnv_setup_msi_irqs,
+       .teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+};
+
 static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
                                           void *tce_mem, u64 tce_size)
 {
@@ -103,6 +127,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
        u64 phb_id;
        int64_t rc;
        static int primary = 1;
+       struct iommu_table_group *table_group;
+       struct iommu_table *tbl;
 
        pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
 
@@ -133,7 +159,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
        phb->hose->first_busno = 0;
        phb->hose->last_busno = 0xff;
        phb->hose->private_data = phb;
-       phb->hose->controller_ops = pnv_pci_controller_ops;
+       phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops;
        phb->hub_id = hub_id;
        phb->opal_id = phb_id;
        phb->type = PNV_PHB_P5IOC2;
@@ -172,6 +198,15 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
        pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
                                  tce_mem, tce_size, 0,
                                  IOMMU_PAGE_SHIFT_4K);
+       /*
+        * We do not allocate iommu_table as we do not support
+        * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
+        * should not be called for phb->p5ioc2.table_group.tables[0] ever.
+        */
+       tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
+       table_group = &phb->p5ioc2.table_group;
+       table_group->tce32_start = tbl->it_offset << tbl->it_page_shift;
+       table_group->tce32_size = tbl->it_size << tbl->it_page_shift;
 }
 
 void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
index bca2aeb6e4b6a4f179622c4786d79e7f65062069..765d8ed558d0e16a6dddc16dc7d67d6b6b44b2ce 100644 (file)
@@ -45,7 +45,7 @@
 //#define cfg_dbg(fmt...)      printk(fmt)
 
 #ifdef CONFIG_PCI_MSI
-static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
        struct pci_controller *hose = pci_bus_to_host(pdev->bus);
        struct pnv_phb *phb = hose->private_data;
@@ -94,7 +94,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
        return 0;
 }
 
-static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
+void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 {
        struct pci_controller *hose = pci_bus_to_host(pdev->bus);
        struct pnv_phb *phb = hose->private_data;
@@ -572,80 +572,152 @@ struct pci_ops pnv_pci_ops = {
        .write = pnv_pci_write_config,
 };
 
-static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
-                        unsigned long uaddr, enum dma_data_direction direction,
-                        struct dma_attrs *attrs, bool rm)
+static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
 {
-       u64 proto_tce;
-       __be64 *tcep, *tces;
-       u64 rpn;
-
-       proto_tce = TCE_PCI_READ; // Read allowed
+       __be64 *tmp = ((__be64 *)tbl->it_base);
+       int  level = tbl->it_indirect_levels;
+       const long shift = ilog2(tbl->it_level_size);
+       unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+       while (level) {
+               int n = (idx & mask) >> (level * shift);
+               unsigned long tce = be64_to_cpu(tmp[n]);
+
+               tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+               idx &= ~mask;
+               mask >>= shift;
+               --level;
+       }
 
-       if (direction != DMA_TO_DEVICE)
-               proto_tce |= TCE_PCI_WRITE;
+       return tmp + idx;
+}
 
-       tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
-       rpn = __pa(uaddr) >> tbl->it_page_shift;
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+               unsigned long uaddr, enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       u64 proto_tce = iommu_direction_to_tce_perm(direction);
+       u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+       long i;
 
-       while (npages--)
-               *(tcep++) = cpu_to_be64(proto_tce |
-                               (rpn++ << tbl->it_page_shift));
+       for (i = 0; i < npages; i++) {
+               unsigned long newtce = proto_tce |
+                       ((rpn + i) << tbl->it_page_shift);
+               unsigned long idx = index - tbl->it_offset + i;
 
-       /* Some implementations won't cache invalid TCEs and thus may not
-        * need that flush. We'll probably turn it_type into a bit mask
-        * of flags if that becomes the case
-        */
-       if (tbl->it_type & TCE_PCI_SWINV_CREATE)
-               pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+               *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
+       }
 
        return 0;
 }
 
-static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
-                           unsigned long uaddr,
-                           enum dma_data_direction direction,
-                           struct dma_attrs *attrs)
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
 {
-       return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
-                       false);
+       u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+       unsigned long newtce = *hpa | proto_tce, oldtce;
+       unsigned long idx = index - tbl->it_offset;
+
+       BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+       oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
+       *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+       *direction = iommu_tce_direction(oldtce);
+
+       return 0;
 }
+#endif
 
-static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
-               bool rm)
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 {
-       __be64 *tcep, *tces;
+       long i;
 
-       tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
+       for (i = 0; i < npages; i++) {
+               unsigned long idx = index - tbl->it_offset + i;
 
-       while (npages--)
-               *(tcep++) = cpu_to_be64(0);
+               *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
+       }
+}
 
-       if (tbl->it_type & TCE_PCI_SWINV_FREE)
-               pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+       return *(pnv_tce(tbl, index - tbl->it_offset));
 }
 
-static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+struct iommu_table *pnv_pci_table_alloc(int nid)
 {
-       pnv_tce_free(tbl, index, npages, false);
+       struct iommu_table *tbl;
+
+       tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
+       INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+
+       return tbl;
 }
 
-static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+long pnv_pci_link_table_and_group(int node, int num,
+               struct iommu_table *tbl,
+               struct iommu_table_group *table_group)
 {
-       return ((u64 *)tbl->it_base)[index - tbl->it_offset];
+       struct iommu_table_group_link *tgl = NULL;
+
+       if (WARN_ON(!tbl || !table_group))
+               return -EINVAL;
+
+       tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+                       node);
+       if (!tgl)
+               return -ENOMEM;
+
+       tgl->table_group = table_group;
+       list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+       table_group->tables[num] = tbl;
+
+       return 0;
 }
 
-static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
-                           unsigned long uaddr,
-                           enum dma_data_direction direction,
-                           struct dma_attrs *attrs)
+static void pnv_iommu_table_group_link_free(struct rcu_head *head)
 {
-       return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
+       struct iommu_table_group_link *tgl = container_of(head,
+                       struct iommu_table_group_link, rcu);
+
+       kfree(tgl);
 }
 
-static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+               struct iommu_table_group *table_group)
 {
-       pnv_tce_free(tbl, index, npages, true);
+       long i;
+       bool found;
+       struct iommu_table_group_link *tgl;
+
+       if (!tbl || !table_group)
+               return;
+
+       /* Remove link to a group from table's list of attached groups */
+       found = false;
+       list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+               if (tgl->table_group == table_group) {
+                       list_del_rcu(&tgl->next);
+                       call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+                       found = true;
+                       break;
+               }
+       }
+       if (WARN_ON(!found))
+               return;
+
+       /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+       found = false;
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               if (table_group->tables[i] == tbl) {
+                       table_group->tables[i] = NULL;
+                       found = true;
+                       break;
+               }
+       }
+       WARN_ON(!found);
 }
 
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
@@ -662,7 +734,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
        tbl->it_type = TCE_PCI;
 }
 
-static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
+void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 {
        struct pci_controller *hose = pci_bus_to_host(pdev->bus);
        struct pnv_phb *phb = hose->private_data;
@@ -689,16 +761,6 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
                phb->dma_dev_setup(phb, pdev);
 }
 
-int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
-{
-       struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-       struct pnv_phb *phb = hose->private_data;
-
-       if (phb && phb->dma_set_mask)
-               return phb->dma_set_mask(phb, pdev, dma_mask);
-       return __dma_set_mask(&pdev->dev, dma_mask);
-}
-
 u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
 {
        struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -714,12 +776,9 @@ void pnv_pci_shutdown(void)
 {
        struct pci_controller *hose;
 
-       list_for_each_entry(hose, &hose_list, list_node) {
-               struct pnv_phb *phb = hose->private_data;
-
-               if (phb && phb->shutdown)
-                       phb->shutdown(phb);
-       }
+       list_for_each_entry(hose, &hose_list, list_node)
+               if (hose->controller_ops.shutdown)
+                       hose->controller_ops.shutdown(hose);
 }
 
 /* Fixup wrong class code in p7ioc and p8 root complex */
@@ -762,22 +821,7 @@ void __init pnv_pci_init(void)
        pci_devs_phb_init();
 
        /* Configure IOMMU DMA hooks */
-       ppc_md.tce_build = pnv_tce_build_vm;
-       ppc_md.tce_free = pnv_tce_free_vm;
-       ppc_md.tce_build_rm = pnv_tce_build_rm;
-       ppc_md.tce_free_rm = pnv_tce_free_rm;
-       ppc_md.tce_get = pnv_tce_get;
        set_pci_dma_ops(&dma_iommu_ops);
-
-       /* Configure MSIs */
-#ifdef CONFIG_PCI_MSI
-       ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
-#endif
 }
 
 machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
-
-struct pci_controller_ops pnv_pci_controller_ops = {
-       .dma_dev_setup = pnv_pci_dma_dev_setup,
-};
index 070ee888fc95cef223337e3bd945726ffc7ac7cc..8ef2d28aded0f6ebda3a0d9f767726927817dac5 100644 (file)
@@ -57,8 +57,7 @@ struct pnv_ioda_pe {
        /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
        int                     tce32_seg;
        int                     tce32_segcount;
-       struct iommu_table      *tce32_table;
-       phys_addr_t             tce_inval_reg_phys;
+       struct iommu_table_group table_group;
 
        /* 64-bit TCE bypass region */
        bool                    tce_bypass_enabled;
@@ -106,13 +105,10 @@ struct pnv_phb {
                         unsigned int hwirq, unsigned int virq,
                         unsigned int is_64, struct msi_msg *msg);
        void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
-       int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
-                           u64 dma_mask);
        u64 (*dma_get_required_mask)(struct pnv_phb *phb,
                                     struct pci_dev *pdev);
        void (*fixup_phb)(struct pci_controller *hose);
        u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
-       void (*shutdown)(struct pnv_phb *phb);
        int (*init_m64)(struct pnv_phb *phb);
        void (*reserve_m64_pe)(struct pnv_phb *phb);
        int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
@@ -123,6 +119,7 @@ struct pnv_phb {
        union {
                struct {
                        struct iommu_table iommu_table;
+                       struct iommu_table_group table_group;
                } p5ioc2;
 
                struct {
@@ -186,6 +183,12 @@ struct pnv_phb {
                         * boot for resource allocation purposes
                         */
                        struct list_head        pe_dma_list;
+
+                       /* TCE cache invalidate registers (physical and
+                        * remapped)
+                        */
+                       phys_addr_t             tce_inval_reg_phys;
+                       __be64 __iomem          *tce_inval_reg;
                } ioda;
        };
 
@@ -200,6 +203,13 @@ struct pnv_phb {
 };
 
 extern struct pci_ops pnv_pci_ops;
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+               unsigned long uaddr, enum dma_data_direction direction,
+               struct dma_attrs *attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
 
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
                                unsigned char *log_buff);
@@ -207,6 +217,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
                     int where, int size, u32 *val);
 int pnv_pci_cfg_write(struct pci_dn *pdn,
                      int where, int size, u32 val);
+extern struct iommu_table *pnv_pci_table_alloc(int nid);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+               struct iommu_table *tbl,
+               struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+               struct iommu_table_group *table_group);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
                                      void *tce_mem, u64 tce_size,
                                      u64 dma_offset, unsigned page_shift);
@@ -218,4 +235,8 @@ extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 
+extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
+extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+
 #endif /* __POWERNV_PCI_H */
index 826d2c9bea5693864c44143f2e3fb7e5a66cb609..9269e30e4ca0e75efcf476910809c9508a5b0698 100644 (file)
@@ -12,29 +12,24 @@ struct pci_dev;
 #ifdef CONFIG_PCI
 extern void pnv_pci_init(void);
 extern void pnv_pci_shutdown(void);
-extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
 extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev);
 #else
 static inline void pnv_pci_init(void) { }
 static inline void pnv_pci_shutdown(void) { }
 
-static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
-{
-       return -ENODEV;
-}
-
 static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
 {
        return 0;
 }
 #endif
 
-extern struct pci_controller_ops pnv_pci_controller_ops;
-
 extern u32 pnv_get_supported_cpuidle_states(void);
 
 extern void pnv_lpc_init(void);
 
+extern void opal_handle_events(uint64_t events);
+extern void opal_event_shutdown(void);
+
 bool cpu_core_split_required(void);
 
 #endif /* _POWERNV_H */
index 16fdcb23f4c3ab12f4d392b6324e5fd79ddb3b14..53737e019ae362dba4e1c52f30c55279dd4623fe 100644 (file)
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
-#include <asm/cputhreads.h>
-#include <asm/cpuidle.h>
-#include <asm/code-patching.h>
 
 #include "powernv.h"
-#include "subcore.h"
 
 static void __init pnv_setup_arch(void)
 {
@@ -111,7 +107,7 @@ static void pnv_prepare_going_down(void)
         * Disable all notifiers from OPAL, we can't
         * service interrupts anymore anyway
         */
-       opal_notifier_disable();
+       opal_event_shutdown();
 
        /* Soft disable interrupts */
        local_irq_disable();
@@ -169,13 +165,6 @@ static void pnv_progress(char *s, unsigned short hex)
 {
 }
 
-static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
-{
-       if (dev_is_pci(dev))
-               return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
-       return __dma_set_mask(dev, dma_mask);
-}
-
 static u64 pnv_dma_get_required_mask(struct device *dev)
 {
        if (dev_is_pci(dev))
@@ -277,173 +266,6 @@ static void __init pnv_setup_machdep_opal(void)
        ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 }
 
-static u32 supported_cpuidle_states;
-
-int pnv_save_sprs_for_winkle(void)
-{
-       int cpu;
-       int rc;
-
-       /*
-        * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
-        * all cpus at boot. Get these reg values of current cpu and use the
-        * same accross all cpus.
-        */
-       uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
-       uint64_t hid0_val = mfspr(SPRN_HID0);
-       uint64_t hid1_val = mfspr(SPRN_HID1);
-       uint64_t hid4_val = mfspr(SPRN_HID4);
-       uint64_t hid5_val = mfspr(SPRN_HID5);
-       uint64_t hmeer_val = mfspr(SPRN_HMEER);
-
-       for_each_possible_cpu(cpu) {
-               uint64_t pir = get_hard_smp_processor_id(cpu);
-               uint64_t hsprg0_val = (uint64_t)&paca[cpu];
-
-               /*
-                * HSPRG0 is used to store the cpu's pointer to paca. Hence last
-                * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
-                * with 63rd bit set, so that when a thread wakes up at 0x100 we
-                * can use this bit to distinguish between fastsleep and
-                * deep winkle.
-                */
-               hsprg0_val |= 1;
-
-               rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
-               if (rc != 0)
-                       return rc;
-
-               rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
-               if (rc != 0)
-                       return rc;
-
-               /* HIDs are per core registers */
-               if (cpu_thread_in_core(cpu) == 0) {
-
-                       rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
-                       if (rc != 0)
-                               return rc;
-
-                       rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
-                       if (rc != 0)
-                               return rc;
-
-                       rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
-                       if (rc != 0)
-                               return rc;
-
-                       rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
-                       if (rc != 0)
-                               return rc;
-
-                       rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
-                       if (rc != 0)
-                               return rc;
-               }
-       }
-
-       return 0;
-}
-
-static void pnv_alloc_idle_core_states(void)
-{
-       int i, j;
-       int nr_cores = cpu_nr_cores();
-       u32 *core_idle_state;
-
-       /*
-        * core_idle_state - First 8 bits track the idle state of each thread
-        * of the core. The 8th bit is the lock bit. Initially all thread bits
-        * are set. They are cleared when the thread enters deep idle state
-        * like sleep and winkle. Initially the lock bit is cleared.
-        * The lock bit has 2 purposes
-        * a. While the first thread is restoring core state, it prevents
-        * other threads in the core from switching to process context.
-        * b. While the last thread in the core is saving the core state, it
-        * prevents a different thread from waking up.
-        */
-       for (i = 0; i < nr_cores; i++) {
-               int first_cpu = i * threads_per_core;
-               int node = cpu_to_node(first_cpu);
-
-               core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
-               *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
-
-               for (j = 0; j < threads_per_core; j++) {
-                       int cpu = first_cpu + j;
-
-                       paca[cpu].core_idle_state_ptr = core_idle_state;
-                       paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
-                       paca[cpu].thread_mask = 1 << j;
-               }
-       }
-
-       update_subcore_sibling_mask();
-
-       if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
-               pnv_save_sprs_for_winkle();
-}
-
-u32 pnv_get_supported_cpuidle_states(void)
-{
-       return supported_cpuidle_states;
-}
-EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
-
-static int __init pnv_init_idle_states(void)
-{
-       struct device_node *power_mgt;
-       int dt_idle_states;
-       u32 *flags;
-       int i;
-
-       supported_cpuidle_states = 0;
-
-       if (cpuidle_disable != IDLE_NO_OVERRIDE)
-               goto out;
-
-       if (!firmware_has_feature(FW_FEATURE_OPALv3))
-               goto out;
-
-       power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
-       if (!power_mgt) {
-               pr_warn("opal: PowerMgmt Node not found\n");
-               goto out;
-       }
-       dt_idle_states = of_property_count_u32_elems(power_mgt,
-                       "ibm,cpu-idle-state-flags");
-       if (dt_idle_states < 0) {
-               pr_warn("cpuidle-powernv: no idle states found in the DT\n");
-               goto out;
-       }
-
-       flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
-       if (of_property_read_u32_array(power_mgt,
-                       "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
-               pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
-               goto out_free;
-       }
-
-       for (i = 0; i < dt_idle_states; i++)
-               supported_cpuidle_states |= flags[i];
-
-       if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
-               patch_instruction(
-                       (unsigned int *)pnv_fastsleep_workaround_at_entry,
-                       PPC_INST_NOP);
-               patch_instruction(
-                       (unsigned int *)pnv_fastsleep_workaround_at_exit,
-                       PPC_INST_NOP);
-       }
-       pnv_alloc_idle_core_states();
-out_free:
-       kfree(flags);
-out:
-       return 0;
-}
-
-subsys_initcall(pnv_init_idle_states);
-
 static int __init pnv_probe(void)
 {
        unsigned long root = of_get_flat_dt_root();
@@ -492,7 +314,6 @@ define_machine(powernv) {
        .machine_shutdown       = pnv_shutdown,
        .power_save             = power7_idle,
        .calibrate_decr         = generic_calibrate_decr,
-       .dma_set_mask           = pnv_dma_set_mask,
        .dma_get_required_mask  = pnv_dma_get_required_mask,
 #ifdef CONFIG_KEXEC
        .kexec_cpu_down         = pnv_kexec_cpu_down,
index 019d34aaf054bc843d6a2d5ac531e9207e6c42ab..47d9cebe7159edb2d5d72a3167a89d928dbaa5dc 100644 (file)
@@ -421,11 +421,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
                return -ENODEV;
 
        dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
+       of_node_put(parent);
        if (!dn)
                return -EINVAL;
 
-       of_node_put(parent);
-
        rc = dlpar_attach_node(dn);
        if (rc) {
                dlpar_release_drc(drc_index);
index 2039397cc75d5f6230dc91c945fc7a79dc0ecc68..1ba55d0bb449ddd79cbe6a644a265d09c730a1a5 100644 (file)
@@ -519,7 +519,7 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
 /**
  * pseries_eeh_wait_state - Wait for PE state
  * @pe: EEH PE
- * @max_wait: maximal period in microsecond
+ * @max_wait: maximal period in millisecond
  *
  * Wait for the state of associated PE. It might take some time
  * to retrieve the PE's state.
index 61d5a17f45c0b5dff8265dc8b1606ef7c08b2ce5..10510dea16b31a30be6382a0b9a8c5c9c4e1affd 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/crash_dump.h>
 #include <linux/memory.h>
 #include <linux/of.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 
 #include "pseries.h"
 
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+       struct iommu_table_group *table_group = NULL;
+       struct iommu_table *tbl = NULL;
+       struct iommu_table_group_link *tgl = NULL;
+
+       table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
+                          node);
+       if (!table_group)
+               goto fail_exit;
+
+       tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
+       if (!tbl)
+               goto fail_exit;
+
+       tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+                       node);
+       if (!tgl)
+               goto fail_exit;
+
+       INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+       tgl->table_group = table_group;
+       list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+       table_group->tables[0] = tbl;
+
+       return table_group;
+
+fail_exit:
+       kfree(tgl);
+       kfree(table_group);
+       kfree(tbl);
+
+       return NULL;
+}
+
+static void iommu_pseries_free_group(struct iommu_table_group *table_group,
+               const char *node_name)
+{
+       struct iommu_table *tbl;
+#ifdef CONFIG_IOMMU_API
+       struct iommu_table_group_link *tgl;
+#endif
+
+       if (!table_group)
+               return;
+
+       tbl = table_group->tables[0];
+#ifdef CONFIG_IOMMU_API
+       tgl = list_first_entry_or_null(&tbl->it_group_list,
+                       struct iommu_table_group_link, next);
+
+       WARN_ON_ONCE(!tgl);
+       if (tgl) {
+               list_del_rcu(&tgl->next);
+               kfree(tgl);
+       }
+       if (table_group->group) {
+               iommu_group_put(table_group->group);
+               BUG_ON(table_group->group);
+       }
+#endif
+       iommu_free_table(tbl, node_name);
+
+       kfree(table_group);
+}
+
 static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
                                      __be64 *startp, __be64 *endp)
 {
@@ -193,7 +262,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
        int ret = 0;
        unsigned long flags;
 
-       if (npages == 1) {
+       if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
                return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
                                           direction, attrs);
        }
@@ -285,6 +354,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 {
        u64 rc;
 
+       if (!firmware_has_feature(FW_FEATURE_MULTITCE))
+               return tce_free_pSeriesLP(tbl, tcenum, npages);
+
        rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
 
        if (rc && printk_ratelimit()) {
@@ -460,7 +532,6 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
        return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
 }
 
-
 #ifdef CONFIG_PCI
 static void iommu_table_setparms(struct pci_controller *phb,
                                 struct device_node *dn,
@@ -546,6 +617,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
        tbl->it_size = size >> tbl->it_page_shift;
 }
 
+struct iommu_table_ops iommu_table_pseries_ops = {
+       .set = tce_build_pSeries,
+       .clear = tce_free_pSeries,
+       .get = tce_get_pseries
+};
+
 static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 {
        struct device_node *dn;
@@ -610,12 +687,13 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
        pci->phb->dma_window_size = 0x8000000ul;
        pci->phb->dma_window_base_cur = 0x8000000ul;
 
-       tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
-                          pci->phb->node);
+       pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+       tbl = pci->table_group->tables[0];
 
        iommu_table_setparms(pci->phb, dn, tbl);
-       pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
-       iommu_register_group(tbl, pci_domain_nr(bus), 0);
+       tbl->it_ops = &iommu_table_pseries_ops;
+       iommu_init_table(tbl, pci->phb->node);
+       iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
 
        /* Divide the rest (1.75GB) among the children */
        pci->phb->dma_window_size = 0x80000000ul;
@@ -625,6 +703,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
        pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
 }
 
+struct iommu_table_ops iommu_table_lpar_multi_ops = {
+       .set = tce_buildmulti_pSeriesLP,
+       .clear = tce_freemulti_pSeriesLP,
+       .get = tce_get_pSeriesLP
+};
 
 static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 {
@@ -653,15 +736,17 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
        ppci = PCI_DN(pdn);
 
        pr_debug("  parent is %s, iommu_table: 0x%p\n",
-                pdn->full_name, ppci->iommu_table);
+                pdn->full_name, ppci->table_group);
 
-       if (!ppci->iommu_table) {
-               tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
-                                  ppci->phb->node);
+       if (!ppci->table_group) {
+               ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
+               tbl = ppci->table_group->tables[0];
                iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
-               ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
-               iommu_register_group(tbl, pci_domain_nr(bus), 0);
-               pr_debug("  created table: %p\n", ppci->iommu_table);
+               tbl->it_ops = &iommu_table_lpar_multi_ops;
+               iommu_init_table(tbl, ppci->phb->node);
+               iommu_register_group(ppci->table_group,
+                               pci_domain_nr(bus), 0);
+               pr_debug("  created table: %p\n", ppci->table_group);
        }
 }
 
@@ -683,13 +768,15 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
                struct pci_controller *phb = PCI_DN(dn)->phb;
 
                pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
-               tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
-                                  phb->node);
+               PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
+               tbl = PCI_DN(dn)->table_group->tables[0];
                iommu_table_setparms(phb, dn, tbl);
-               PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
-               iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
-               set_iommu_table_base_and_group(&dev->dev,
-                                              PCI_DN(dn)->iommu_table);
+               tbl->it_ops = &iommu_table_pseries_ops;
+               iommu_init_table(tbl, phb->node);
+               iommu_register_group(PCI_DN(dn)->table_group,
+                               pci_domain_nr(phb->bus), 0);
+               set_iommu_table_base(&dev->dev, tbl);
+               iommu_add_device(&dev->dev);
                return;
        }
 
@@ -697,13 +784,14 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
         * an already allocated iommu table is found and use that.
         */
 
-       while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL)
+       while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
                dn = dn->parent;
 
-       if (dn && PCI_DN(dn))
-               set_iommu_table_base_and_group(&dev->dev,
-                                              PCI_DN(dn)->iommu_table);
-       else
+       if (dn && PCI_DN(dn)) {
+               set_iommu_table_base(&dev->dev,
+                               PCI_DN(dn)->table_group->tables[0]);
+               iommu_add_device(&dev->dev);
+       } else
                printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
                       pci_name(dev));
 }
@@ -1088,7 +1176,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
        dn = pci_device_to_OF_node(dev);
        pr_debug("  node is %s\n", dn->full_name);
 
-       for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+       for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
             pdn = pdn->parent) {
                dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
                if (dma_window)
@@ -1104,18 +1192,21 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
        pr_debug("  parent is %s\n", pdn->full_name);
 
        pci = PCI_DN(pdn);
-       if (!pci->iommu_table) {
-               tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
-                                  pci->phb->node);
+       if (!pci->table_group) {
+               pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+               tbl = pci->table_group->tables[0];
                iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
-               pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
-               iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
-               pr_debug("  created table: %p\n", pci->iommu_table);
+               tbl->it_ops = &iommu_table_lpar_multi_ops;
+               iommu_init_table(tbl, pci->phb->node);
+               iommu_register_group(pci->table_group,
+                               pci_domain_nr(pci->phb->bus), 0);
+               pr_debug("  created table: %p\n", pci->table_group);
        } else {
-               pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
+               pr_debug("  found DMA window, table: %p\n", pci->table_group);
        }
 
-       set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
+       set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
+       iommu_add_device(&dev->dev);
 }
 
 static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
@@ -1145,7 +1236,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
                 * search upwards in the tree until we either hit a dma-window
                 * property, OR find a parent with a table already allocated.
                 */
-               for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+               for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
                                pdn = pdn->parent) {
                        dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
                        if (dma_window)
@@ -1189,7 +1280,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
                dn = pci_device_to_OF_node(pdev);
 
                /* search upwards for ibm,dma-window */
-               for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table;
+               for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
                                dn = dn->parent)
                        if (of_get_property(dn, "ibm,dma-window", NULL))
                                break;
@@ -1269,8 +1360,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
                 * the device node.
                 */
                remove_ddw(np, false);
-               if (pci && pci->iommu_table)
-                       iommu_free_table(pci->iommu_table, np->full_name);
+               if (pci && pci->table_group)
+                       iommu_pseries_free_group(pci->table_group,
+                                       np->full_name);
 
                spin_lock(&direct_window_list_lock);
                list_for_each_entry(window, &direct_window_list, list) {
@@ -1300,22 +1392,11 @@ void iommu_init_early_pSeries(void)
                return;
 
        if (firmware_has_feature(FW_FEATURE_LPAR)) {
-               if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
-                       ppc_md.tce_build = tce_buildmulti_pSeriesLP;
-                       ppc_md.tce_free  = tce_freemulti_pSeriesLP;
-               } else {
-                       ppc_md.tce_build = tce_build_pSeriesLP;
-                       ppc_md.tce_free  = tce_free_pSeriesLP;
-               }
-               ppc_md.tce_get   = tce_get_pSeriesLP;
                pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
                pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
                ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
                ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
        } else {
-               ppc_md.tce_build = tce_build_pSeries;
-               ppc_md.tce_free  = tce_free_pSeries;
-               ppc_md.tce_get   = tce_get_pseries;
                pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
                pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
        }
@@ -1333,8 +1414,6 @@ static int __init disable_multitce(char *str)
            firmware_has_feature(FW_FEATURE_LPAR) &&
            firmware_has_feature(FW_FEATURE_MULTITCE)) {
                printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
-               ppc_md.tce_build = tce_build_pSeriesLP;
-               ppc_md.tce_free  = tce_free_pSeriesLP;
                powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
        }
        return 1;
index c8d24f9a69481d007fe4bcccb65bad5b2f032e5c..c22bb647cce678cdc9eac5121d1efddeaf00e37e 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/ppc-pci.h>
 #include <asm/machdep.h>
 
+#include "pseries.h"
+
 static int query_token, change_token;
 
 #define RTAS_QUERY_FN          0
@@ -505,6 +507,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 
 static int rtas_msi_init(void)
 {
+       struct pci_controller *phb;
+
        query_token  = rtas_token("ibm,query-interrupt-source-number");
        change_token = rtas_token("ibm,change-msi");
 
@@ -516,9 +520,15 @@ static int rtas_msi_init(void)
 
        pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
 
-       WARN_ON(ppc_md.setup_msi_irqs);
-       ppc_md.setup_msi_irqs = rtas_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs;
+       WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
+       pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
+       pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
+
+       list_for_each_entry(phb, &hose_list, list_node) {
+               WARN_ON(phb->controller_ops.setup_msi_irqs);
+               phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
+               phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
+       }
 
        WARN_ON(ppc_md.pci_irq_fixup);
        ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
index f7cb2a1b01fa053ddbd7df7aca8a423fc84af56f..5b492a6438ffa8723ca9e78feaa17d92bff57da2 100644 (file)
@@ -2,7 +2,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-$(CONFIG_PPC64)                := $(NO_MINIMAL_TOC)
 
-mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o
+mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o
 obj-$(CONFIG_MPIC)             += mpic.o $(mpic-msi-obj-y)
 obj-$(CONFIG_MPIC_TIMER)        += mpic_timer.o
 obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP)    += fsl_mpic_timer_wakeup.o
index d00a5663e312d33c6e0e01c6871ba9247a59cfc6..90bcdfeedf4829fd9b8658e340f1694f97d68102 100644 (file)
@@ -286,6 +286,12 @@ static int __init dart_init(struct device_node *dart_node)
        return 0;
 }
 
+static struct iommu_table_ops iommu_dart_ops = {
+       .set = dart_build,
+       .clear = dart_free,
+       .flush = dart_flush,
+};
+
 static void iommu_table_dart_setup(void)
 {
        iommu_table_dart.it_busno = 0;
@@ -298,6 +304,7 @@ static void iommu_table_dart_setup(void)
        iommu_table_dart.it_base = (unsigned long)dart_vbase;
        iommu_table_dart.it_index = 0;
        iommu_table_dart.it_blocksize = 1;
+       iommu_table_dart.it_ops = &iommu_dart_ops;
        iommu_init_table(&iommu_table_dart, -1);
 
        /* Reserve the last page of the DART to avoid possible prefetch
@@ -386,11 +393,6 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
        if (dart_init(dn) != 0)
                goto bail;
 
-       /* Setup low level TCE operations for the core IOMMU code */
-       ppc_md.tce_build = dart_build;
-       ppc_md.tce_free  = dart_free;
-       ppc_md.tce_flush = dart_flush;
-
        /* Setup bypass if supported */
        if (dart_is_u4)
                ppc_md.dma_set_mask = dart_dma_set_mask;
index f086c6f22dc963dcc008033d01dd4f10247be7b1..5236e5427c38c2c922a2266a16ea4d172db3ef21 100644 (file)
@@ -405,6 +405,7 @@ static int fsl_of_msi_probe(struct platform_device *dev)
        const struct fsl_msi_feature *features;
        int len;
        u32 offset;
+       struct pci_controller *phb;
 
        match = of_match_device(fsl_of_msi_ids, &dev->dev);
        if (!match)
@@ -541,14 +542,20 @@ static int fsl_of_msi_probe(struct platform_device *dev)
 
        list_add_tail(&msi->list, &msi_head);
 
-       /* The multiple setting ppc_md.setup_msi_irqs will not harm things */
-       if (!ppc_md.setup_msi_irqs) {
-               ppc_md.setup_msi_irqs = fsl_setup_msi_irqs;
-               ppc_md.teardown_msi_irqs = fsl_teardown_msi_irqs;
-       } else if (ppc_md.setup_msi_irqs != fsl_setup_msi_irqs) {
-               dev_err(&dev->dev, "Different MSI driver already installed!\n");
-               err = -ENODEV;
-               goto error_out;
+       /*
+        * Apply the MSI ops to all the controllers.
+        * It doesn't hurt to reassign the same ops,
+        * but bail out if we find another MSI driver.
+        */
+       list_for_each_entry(phb, &hose_list, list_node) {
+               if (!phb->controller_ops.setup_msi_irqs) {
+                       phb->controller_ops.setup_msi_irqs = fsl_setup_msi_irqs;
+                       phb->controller_ops.teardown_msi_irqs = fsl_teardown_msi_irqs;
+               } else if (phb->controller_ops.setup_msi_irqs != fsl_setup_msi_irqs) {
+                       dev_err(&dev->dev, "Different MSI driver already installed!\n");
+                       err = -ENODEV;
+                       goto error_out;
+               }
        }
        return 0;
 error_out:
index 45598da0b3214221feb8b8d66200fe8146b3ef2f..31c33475c7b7042e2224a1094c9070c558096990 100644 (file)
@@ -204,7 +204,7 @@ static int i8259_host_xlate(struct irq_domain *h, struct device_node *ct,
        return 0;
 }
 
-static struct irq_domain_ops i8259_host_ops = {
+static const struct irq_domain_ops i8259_host_ops = {
        .match = i8259_host_match,
        .map = i8259_host_map,
        .xlate = i8259_host_xlate,
index b28733727ed3cc705b2fa2361aed4e38f1b61962..d78f1364b639d8ab0f208d37b0ef8662b3d7568e 100644 (file)
@@ -691,7 +691,7 @@ static int ipic_host_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops ipic_host_ops = {
+static const struct irq_domain_ops ipic_host_ops = {
        .match  = ipic_host_match,
        .map    = ipic_host_map,
        .xlate  = irq_domain_xlate_onetwocell,
index c4828c0be5bd861e734577dc982071a305e8fbea..d93a78be43469cfe5fe7f481bdf5ceb99114e11b 100644 (file)
@@ -120,7 +120,7 @@ static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
 }
 
 
-static struct irq_domain_ops mpc8xx_pic_host_ops = {
+static const struct irq_domain_ops mpc8xx_pic_host_ops = {
        .map = mpc8xx_pic_host_map,
        .xlate = mpc8xx_pic_host_xlate,
 };
index b2b8447a227a34a73d7f3746a3d9e74a5e5d2616..c8e73332eaad5ab1d08ef3036be34b4b484d8ef6 100644 (file)
@@ -1195,7 +1195,7 @@ static void mpic_cascade(unsigned int irq, struct irq_desc *desc)
        chip->irq_eoi(&desc->irq_data);
 }
 
-static struct irq_domain_ops mpic_host_ops = {
+static const struct irq_domain_ops mpic_host_ops = {
        .match = mpic_host_match,
        .map = mpic_host_map,
        .xlate = mpic_host_xlate,
index 24bf07a63924e3c28745b6ab3eb264dbf93975bd..32971a41853ba4316338dff84993ebcce742250a 100644 (file)
@@ -15,7 +15,6 @@
 extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq);
 extern int mpic_msi_init_allocator(struct mpic *mpic);
 extern int mpic_u3msi_init(struct mpic *mpic);
-extern int mpic_pasemi_msi_init(struct mpic *mpic);
 #else
 static inline void mpic_msi_reserve_hwirq(struct mpic *mpic,
                                          irq_hw_number_t hwirq)
@@ -27,11 +26,12 @@ static inline int mpic_u3msi_init(struct mpic *mpic)
 {
        return -1;
 }
+#endif
 
-static inline int mpic_pasemi_msi_init(struct mpic *mpic)
-{
-       return -1;
-}
+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI)
+int mpic_pasemi_msi_init(struct mpic *mpic);
+#else
+static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; }
 #endif
 
 extern int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type);
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c
deleted file mode 100644 (file)
index a3f660e..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright 2007, Olof Johansson, PA Semi
- *
- * Based on arch/powerpc/sysdev/mpic_u3msi.c:
- *
- * Copyright 2006, Segher Boessenkool, IBM Corporation.
- * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the
- * License.
- *
- */
-
-#undef DEBUG
-
-#include <linux/irq.h>
-#include <linux/msi.h>
-#include <asm/mpic.h>
-#include <asm/prom.h>
-#include <asm/hw_irq.h>
-#include <asm/ppc-pci.h>
-#include <asm/msi_bitmap.h>
-
-#include "mpic.h"
-
-/* Allocate 16 interrupts per device, to give an alignment of 16,
- * since that's the size of the grouping w.r.t. affinity. If someone
- * needs more than 32 MSI's down the road we'll have to rethink this,
- * but it should be OK for now.
- */
-#define ALLOC_CHUNK 16
-
-#define PASEMI_MSI_ADDR 0xfc080000
-
-/* A bit ugly, can we get this from the pci_dev somehow? */
-static struct mpic *msi_mpic;
-
-
-static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
-{
-       pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
-       pci_msi_mask_irq(data);
-       mpic_mask_irq(data);
-}
-
-static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
-{
-       pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
-       mpic_unmask_irq(data);
-       pci_msi_unmask_irq(data);
-}
-
-static struct irq_chip mpic_pasemi_msi_chip = {
-       .irq_shutdown           = mpic_pasemi_msi_mask_irq,
-       .irq_mask               = mpic_pasemi_msi_mask_irq,
-       .irq_unmask             = mpic_pasemi_msi_unmask_irq,
-       .irq_eoi                = mpic_end_irq,
-       .irq_set_type           = mpic_set_irq_type,
-       .irq_set_affinity       = mpic_set_affinity,
-       .name                   = "PASEMI-MSI",
-};
-
-static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
-{
-       struct msi_desc *entry;
-
-       pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev);
-
-       list_for_each_entry(entry, &pdev->msi_list, list) {
-               if (entry->irq == NO_IRQ)
-                       continue;
-
-               irq_set_msi_desc(entry->irq, NULL);
-               msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap,
-                                      virq_to_hw(entry->irq), ALLOC_CHUNK);
-               irq_dispose_mapping(entry->irq);
-       }
-
-       return;
-}
-
-static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-       unsigned int virq;
-       struct msi_desc *entry;
-       struct msi_msg msg;
-       int hwirq;
-
-       if (type == PCI_CAP_ID_MSIX)
-               pr_debug("pasemi_msi: MSI-X untested, trying anyway\n");
-       pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n",
-                pdev, nvec, type);
-
-       msg.address_hi = 0;
-       msg.address_lo = PASEMI_MSI_ADDR;
-
-       list_for_each_entry(entry, &pdev->msi_list, list) {
-               /* Allocate 16 interrupts for now, since that's the grouping for
-                * affinity. This can be changed later if it turns out 32 is too
-                * few MSIs for someone, but restrictions will apply to how the
-                * sources can be changed independently.
-                */
-               hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap,
-                                               ALLOC_CHUNK);
-               if (hwirq < 0) {
-                       pr_debug("pasemi_msi: failed allocating hwirq\n");
-                       return hwirq;
-               }
-
-               virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
-               if (virq == NO_IRQ) {
-                       pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n",
-                                 hwirq);
-                       msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq,
-                                              ALLOC_CHUNK);
-                       return -ENOSPC;
-               }
-
-               /* Vector on MSI is really an offset, the hardware adds
-                * it to the value written at the magic address. So set
-                * it to 0 to remain sane.
-                */
-               mpic_set_vector(virq, 0);
-
-               irq_set_msi_desc(virq, entry);
-               irq_set_chip(virq, &mpic_pasemi_msi_chip);
-               irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
-
-               pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \
-                        "addr 0x%x\n", virq, hwirq, msg.address_lo);
-
-               /* Likewise, the device writes [0...511] into the target
-                * register to generate MSI [512...1023]
-                */
-               msg.data = hwirq-0x200;
-               pci_write_msi_msg(virq, &msg);
-       }
-
-       return 0;
-}
-
-int mpic_pasemi_msi_init(struct mpic *mpic)
-{
-       int rc;
-
-       if (!mpic->irqhost->of_node ||
-           !of_device_is_compatible(mpic->irqhost->of_node,
-                                    "pasemi,pwrficient-openpic"))
-               return -ENODEV;
-
-       rc = mpic_msi_init_allocator(mpic);
-       if (rc) {
-               pr_debug("pasemi_msi: Error allocating bitmap!\n");
-               return rc;
-       }
-
-       pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n");
-
-       msi_mpic = mpic;
-       WARN_ON(ppc_md.setup_msi_irqs);
-       ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs;
-
-       return 0;
-}
index b2cef18093893c9323dbea08f9aaf1ed0dab328a..fc46ef3b816eb3ee6b0796775c4eb77b4e9bcbd5 100644 (file)
@@ -181,6 +181,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 int mpic_u3msi_init(struct mpic *mpic)
 {
        int rc;
+       struct pci_controller *phb;
 
        rc = mpic_msi_init_allocator(mpic);
        if (rc) {
@@ -193,9 +194,11 @@ int mpic_u3msi_init(struct mpic *mpic)
        BUG_ON(msi_mpic);
        msi_mpic = mpic;
 
-       WARN_ON(ppc_md.setup_msi_irqs);
-       ppc_md.setup_msi_irqs = u3msi_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = u3msi_teardown_msi_irqs;
+       list_for_each_entry(phb, &hose_list, list_node) {
+               WARN_ON(phb->controller_ops.setup_msi_irqs);
+               phb->controller_ops.setup_msi_irqs = u3msi_setup_msi_irqs;
+               phb->controller_ops.teardown_msi_irqs = u3msi_teardown_msi_irqs;
+       }
 
        return 0;
 }
index 8848e99a83f21a27779c0ea56e94876bfdfda400..0f842dd16bcdd3c9463c7de27a31a40635ce7880 100644 (file)
@@ -223,7 +223,7 @@ static int mv64x60_host_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops mv64x60_host_ops = {
+static const struct irq_domain_ops mv64x60_host_ops = {
        .map   = mv64x60_host_map,
 };
 
index f366d2d4c0790653fd3a7eec3fcca86e864e2451..2bc33674ebfc11d2525700191552389fafc1a23d 100644 (file)
@@ -128,6 +128,7 @@ static int hsta_msi_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct resource *mem;
        int irq, ret, irq_count;
+       struct pci_controller *phb;
 
        mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (IS_ERR(mem)) {
@@ -171,8 +172,10 @@ static int hsta_msi_probe(struct platform_device *pdev)
                }
        }
 
-       ppc_md.setup_msi_irqs = hsta_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = hsta_teardown_msi_irqs;
+       list_for_each_entry(phb, &hose_list, list_node) {
+               phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs;
+               phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs;
+       }
        return 0;
 
 out2:
index 6e2e6aa378bbe36f6d38a7ff6b3c8c6e21c4d063..6eb21f2ea5857295d33bc5f9e8da87b7f0d9071f 100644 (file)
@@ -218,6 +218,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
        struct ppc4xx_msi *msi;
        struct resource res;
        int err = 0;
+       struct pci_controller *phb;
 
        dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
 
@@ -250,8 +251,10 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
        }
        ppc4xx_msi = *msi;
 
-       ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs;
-       ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs;
+       list_for_each_entry(phb, &hose_list, list_node) {
+               phb->controller_ops.setup_msi_irqs = ppc4xx_setup_msi_irqs;
+               phb->controller_ops.teardown_msi_irqs = ppc4xx_teardown_msi_irqs;
+       }
        return err;
 
 error_out:
index 543765e1ef14e6f61c6b97b1cf0fdc9eabba99ec..6512cd8caa517d92496110e8119af46ae067d242 100644 (file)
@@ -271,7 +271,7 @@ static int qe_ic_host_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops qe_ic_host_ops = {
+static const struct irq_domain_ops qe_ic_host_ops = {
        .match = qe_ic_host_match,
        .map = qe_ic_host_map,
        .xlate = irq_domain_xlate_onetwocell,
index 188012c58f7f4e49806d1b3bc5888cf4839daaa1..57b54476e74721eabccf3f15f130c6e2f40c81eb 100644 (file)
@@ -397,7 +397,7 @@ static int pci_irq_host_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops pci_irq_domain_ops = {
+static const struct irq_domain_ops pci_irq_domain_ops = {
        .map = pci_irq_host_map,
        .xlate = pci_irq_host_xlate,
 };
index 7c37157d4c24c5bfe234fb1eeda24924e65a1e8d..d77345338671c1d63b5a98d3258885d9d3aa2b98 100644 (file)
@@ -189,7 +189,7 @@ static int uic_host_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops uic_host_ops = {
+static const struct irq_domain_ops uic_host_ops = {
        .map    = uic_host_map,
        .xlate  = irq_domain_xlate_twocell,
 };
@@ -198,7 +198,7 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
 {
        struct irq_chip *chip = irq_desc_get_chip(desc);
        struct irq_data *idata = irq_desc_get_irq_data(desc);
-       struct uic *uic = irq_get_handler_data(virq);
+       struct uic *uic = irq_desc_get_handler_data(desc);
        u32 msr;
        int src;
        int subvirq;
index 2fc4cf1b75575f9916e26fcf330cfce4959f39a5..eae32654bdf225c5a271b713bb70520395f43a56 100644 (file)
@@ -147,12 +147,16 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
 {
        kvmppc_set_host_ipi(cpu, 1);
 #ifdef CONFIG_PPC_DOORBELL
-       if (cpu_has_feature(CPU_FTR_DBELL) &&
-           (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id()))))
-               doorbell_cause_ipi(cpu, data);
-       else
+       if (cpu_has_feature(CPU_FTR_DBELL)) {
+               if (cpumask_test_cpu(cpu, cpu_sibling_mask(get_cpu()))) {
+                       doorbell_cause_ipi(cpu, data);
+                       put_cpu();
+                       return;
+               }
+               put_cpu();
+       }
 #endif
-               icp_native_set_qirr(cpu, IPI_PRIORITY);
+       icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
 /*
index 878a54036a25cbff2f49c6036b3651a8173e348b..08c248eb491bad53e82afe28a73b510d4aa87f86 100644 (file)
@@ -227,7 +227,7 @@ void xics_migrate_irqs_away(void)
 
                /* Locate interrupt server */
                server = -1;
-               ics = irq_get_chip_data(virq);
+               ics = irq_desc_get_chip_data(desc);
                if (ics)
                        server = ics->get_server(ics, irq);
                if (server < 0) {
@@ -360,7 +360,7 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
        return 0;
 }
 
-static struct irq_domain_ops xics_host_ops = {
+static const struct irq_domain_ops xics_host_ops = {
        .match = xics_host_match,
        .map = xics_host_map,
        .xlate = xics_host_xlate,
index 56f0524e47a6f84f723d26a3bf90a2c688b4d051..43b8b275bc5c50e778e79f9695f9ffb85a266328 100644 (file)
@@ -179,7 +179,7 @@ static int xilinx_intc_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops xilinx_intc_ops = {
+static const struct irq_domain_ops xilinx_intc_ops = {
        .map = xilinx_intc_map,
        .xlate = xilinx_intc_xlate,
 };
index 8753b0f6a317790562cc1126805a11eea3a25a5d..9b409c0f14f7ccff38fa13f964c72e3d336666a1 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/interrupt.h>
 
 #include <asm/opal.h>
 
@@ -23,8 +25,7 @@ struct ipmi_smi_powernv {
        u64                     interface_id;
        struct ipmi_device_id   ipmi_id;
        ipmi_smi_t              intf;
-       u64                     event;
-       struct notifier_block   event_nb;
+       unsigned int            irq;
 
        /**
         * We assume that there can only be one outstanding request, so
@@ -197,15 +198,12 @@ static struct ipmi_smi_handlers ipmi_powernv_smi_handlers = {
        .poll                   = ipmi_powernv_poll,
 };
 
-static int ipmi_opal_event(struct notifier_block *nb,
-                         unsigned long events, void *change)
+static irqreturn_t ipmi_opal_event(int irq, void *data)
 {
-       struct ipmi_smi_powernv *smi = container_of(nb,
-                                       struct ipmi_smi_powernv, event_nb);
+       struct ipmi_smi_powernv *smi = data;
 
-       if (events & smi->event)
-               ipmi_powernv_recv(smi);
-       return 0;
+       ipmi_powernv_recv(smi);
+       return IRQ_HANDLED;
 }
 
 static int ipmi_powernv_probe(struct platform_device *pdev)
@@ -240,13 +238,16 @@ static int ipmi_powernv_probe(struct platform_device *pdev)
                goto err_free;
        }
 
-       ipmi->event = 1ull << prop;
-       ipmi->event_nb.notifier_call = ipmi_opal_event;
+       ipmi->irq = irq_of_parse_and_map(dev->of_node, 0);
+       if (!ipmi->irq) {
+               dev_info(dev, "Unable to map irq from device tree\n");
+               ipmi->irq = opal_event_request(prop);
+       }
 
-       rc = opal_notifier_register(&ipmi->event_nb);
-       if (rc) {
-               dev_warn(dev, "OPAL notifier registration failed (%d)\n", rc);
-               goto err_free;
+       if (request_irq(ipmi->irq, ipmi_opal_event, IRQ_TYPE_LEVEL_HIGH,
+                               "opal-ipmi", ipmi)) {
+               dev_warn(dev, "Unable to request irq\n");
+               goto err_dispose;
        }
 
        ipmi->opal_msg = devm_kmalloc(dev,
@@ -271,7 +272,9 @@ static int ipmi_powernv_probe(struct platform_device *pdev)
 err_free_msg:
        devm_kfree(dev, ipmi->opal_msg);
 err_unregister:
-       opal_notifier_unregister(&ipmi->event_nb);
+       free_irq(ipmi->irq, ipmi);
+err_dispose:
+       irq_dispose_mapping(ipmi->irq);
 err_free:
        devm_kfree(dev, ipmi);
        return rc;
@@ -282,7 +285,9 @@ static int ipmi_powernv_remove(struct platform_device *pdev)
        struct ipmi_smi_powernv *smi = dev_get_drvdata(&pdev->dev);
 
        ipmi_unregister_smi(smi->intf);
-       opal_notifier_unregister(&smi->event_nb);
+       free_irq(smi->irq, smi);
+       irq_dispose_mapping(smi->irq);
+
        return 0;
 }
 
diff --git a/drivers/macintosh/nvram.c b/drivers/macintosh/nvram.c
deleted file mode 100644 (file)
index f0e03e7..0000000
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * /dev/nvram driver for Power Macintosh.
- */
-
-#define NVRAM_VERSION "1.0"
-
-#include <linux/module.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/fcntl.h>
-#include <linux/nvram.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-#include <asm/nvram.h>
-
-#define NVRAM_SIZE     8192
-
-static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
-{
-       switch (origin) {
-       case 0:
-               break;
-       case 1:
-               offset += file->f_pos;
-               break;
-       case 2:
-               offset += NVRAM_SIZE;
-               break;
-       default:
-               offset = -1;
-       }
-       if (offset < 0)
-               return -EINVAL;
-
-       file->f_pos = offset;
-       return file->f_pos;
-}
-
-static ssize_t read_nvram(struct file *file, char __user *buf,
-                         size_t count, loff_t *ppos)
-{
-       unsigned int i;
-       char __user *p = buf;
-
-       if (!access_ok(VERIFY_WRITE, buf, count))
-               return -EFAULT;
-       if (*ppos >= NVRAM_SIZE)
-               return 0;
-       for (i = *ppos; count > 0 && i < NVRAM_SIZE; ++i, ++p, --count)
-               if (__put_user(nvram_read_byte(i), p))
-                       return -EFAULT;
-       *ppos = i;
-       return p - buf;
-}
-
-static ssize_t write_nvram(struct file *file, const char __user *buf,
-                          size_t count, loff_t *ppos)
-{
-       unsigned int i;
-       const char __user *p = buf;
-       char c;
-
-       if (!access_ok(VERIFY_READ, buf, count))
-               return -EFAULT;
-       if (*ppos >= NVRAM_SIZE)
-               return 0;
-       for (i = *ppos; count > 0 && i < NVRAM_SIZE; ++i, ++p, --count) {
-               if (__get_user(c, p))
-                       return -EFAULT;
-               nvram_write_byte(c, i);
-       }
-       *ppos = i;
-       return p - buf;
-}
-
-static long nvram_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-       switch(cmd) {
-               case PMAC_NVRAM_GET_OFFSET:
-               {
-                       int part, offset;
-                       if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
-                               return -EFAULT;
-                       if (part < pmac_nvram_OF || part > pmac_nvram_NR)
-                               return -EINVAL;
-                       offset = pmac_get_partition(part);
-                       if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
-                               return -EFAULT;
-                       break;
-               }
-
-               default:
-                       return -EINVAL;
-       }
-
-       return 0;
-}
-
-const struct file_operations nvram_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = nvram_llseek,
-       .read           = read_nvram,
-       .write          = write_nvram,
-       .unlocked_ioctl = nvram_ioctl,
-};
-
-static struct miscdevice nvram_dev = {
-       NVRAM_MINOR,
-       "nvram",
-       &nvram_fops
-};
-
-int __init nvram_init(void)
-{
-       printk(KERN_INFO "Macintosh non-volatile memory driver v%s\n",
-               NVRAM_VERSION);
-       return misc_register(&nvram_dev);
-}
-
-void __exit nvram_cleanup(void)
-{
-        misc_deregister( &nvram_dev );
-}
-
-module_init(nvram_init);
-module_exit(nvram_cleanup);
-MODULE_LICENSE("GPL");
index a990b39b4dfb8716da5bb121f03ae241071ad33e..b6db9ebd52c298244fb10178a15c01f3523e3f74 100644 (file)
@@ -7,10 +7,15 @@ config CXL_BASE
        default n
        select PPC_COPRO_BASE
 
+config CXL_KERNEL_API
+       bool
+       default n
+
 config CXL
        tristate "Support for IBM Coherent Accelerators (CXL)"
        depends on PPC_POWERNV && PCI_MSI
        select CXL_BASE
+       select CXL_KERNEL_API
        default m
        help
          Select this option to enable driver support for IBM Coherent
index edb494d3ff271a038c0cfdd95ac1fe3f918c1637..14e3f8219a11cfec2723d22582091e3fffabe767 100644 (file)
@@ -1,4 +1,6 @@
-cxl-y                          += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o trace.o
+cxl-y                          += main.o file.o irq.o fault.o native.o
+cxl-y                          += context.o sysfs.o debugfs.o pci.o trace.o
+cxl-y                          += vphb.o api.o
 obj-$(CONFIG_CXL)              += cxl.o
 obj-$(CONFIG_CXL_BASE)         += base.o
 
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
new file mode 100644 (file)
index 0000000..0c77240
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <misc/cxl.h>
+
+#include "cxl.h"
+
+struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
+{
+       struct cxl_afu *afu;
+       struct cxl_context  *ctx;
+       int rc;
+
+       afu = cxl_pci_to_afu(dev);
+
+       ctx = cxl_context_alloc();
+       if (IS_ERR(ctx))
+               return ctx;
+
+       /* Make it a slave context.  We can promote it later? */
+       rc = cxl_context_init(ctx, afu, false, NULL);
+       if (rc) {
+               kfree(ctx);
+               return ERR_PTR(-ENOMEM);
+       }
+       cxl_assign_psn_space(ctx);
+
+       return ctx;
+}
+EXPORT_SYMBOL_GPL(cxl_dev_context_init);
+
+struct cxl_context *cxl_get_context(struct pci_dev *dev)
+{
+       return dev->dev.archdata.cxl_ctx;
+}
+EXPORT_SYMBOL_GPL(cxl_get_context);
+
+struct device *cxl_get_phys_dev(struct pci_dev *dev)
+{
+       struct cxl_afu *afu;
+
+       afu = cxl_pci_to_afu(dev);
+
+       return afu->adapter->dev.parent;
+}
+EXPORT_SYMBOL_GPL(cxl_get_phys_dev);
+
+int cxl_release_context(struct cxl_context *ctx)
+{
+       if (ctx->status != CLOSED)
+               return -EBUSY;
+
+       cxl_context_free(ctx);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_release_context);
+
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+       if (num == 0)
+               num = ctx->afu->pp_irqs;
+       return afu_allocate_irqs(ctx, num);
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+       cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
+static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
+{
+       __u16 range;
+       int r;
+
+       WARN_ON(num == 0);
+
+       for (r = 0; r < CXL_IRQ_RANGES; r++) {
+               range = ctx->irqs.range[r];
+               if (num < range) {
+                       return ctx->irqs.offset[r] + num;
+               }
+               num -= range;
+       }
+       return 0;
+}
+
+int cxl_map_afu_irq(struct cxl_context *ctx, int num,
+                   irq_handler_t handler, void *cookie, char *name)
+{
+       irq_hw_number_t hwirq;
+
+       /*
+        * Find interrupt we are to register.
+        */
+       hwirq = cxl_find_afu_irq(ctx, num);
+       if (!hwirq)
+               return -ENOENT;
+
+       return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
+}
+EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
+
+void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
+{
+       irq_hw_number_t hwirq;
+       unsigned int virq;
+
+       hwirq = cxl_find_afu_irq(ctx, num);
+       if (!hwirq)
+               return;
+
+       virq = irq_find_mapping(NULL, hwirq);
+       if (virq)
+               cxl_unmap_irq(virq, cookie);
+}
+EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
+
+/*
+ * Start a context
+ * Code here similar to afu_ioctl_start_work().
+ */
+int cxl_start_context(struct cxl_context *ctx, u64 wed,
+                     struct task_struct *task)
+{
+       int rc = 0;
+       bool kernel = true;
+
+       pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+       mutex_lock(&ctx->status_mutex);
+       if (ctx->status == STARTED)
+               goto out; /* already started */
+
+       if (task) {
+               ctx->pid = get_task_pid(task, PIDTYPE_PID);
+               get_pid(ctx->pid);
+               kernel = false;
+       }
+
+       cxl_ctx_get();
+
+       if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) {
+               put_pid(ctx->pid);
+               cxl_ctx_put();
+               goto out;
+       }
+
+       ctx->status = STARTED;
+       get_device(&ctx->afu->dev);
+out:
+       mutex_unlock(&ctx->status_mutex);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_start_context);
+
+int cxl_process_element(struct cxl_context *ctx)
+{
+       return ctx->pe;
+}
+EXPORT_SYMBOL_GPL(cxl_process_element);
+
+/* Stop a context.  Returns 0 on success, otherwise -Errno */
+int cxl_stop_context(struct cxl_context *ctx)
+{
+       int rc;
+
+       rc = __detach_context(ctx);
+       if (!rc)
+               put_device(&ctx->afu->dev);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_stop_context);
+
+void cxl_set_master(struct cxl_context *ctx)
+{
+       ctx->master = true;
+       cxl_assign_psn_space(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_set_master);
+
+/* wrappers around afu_* file ops which are EXPORTED */
+int cxl_fd_open(struct inode *inode, struct file *file)
+{
+       return afu_open(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_open);
+int cxl_fd_release(struct inode *inode, struct file *file)
+{
+       return afu_release(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_release);
+long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       return afu_ioctl(file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
+int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
+{
+       return afu_mmap(file, vm);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_mmap);
+unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
+{
+       return afu_poll(file, poll);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_poll);
+ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
+                       loff_t *off)
+{
+       return afu_read(file, buf, count, off);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_read);
+
+#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
+
+/* Get a struct file and fd for a context and attach the ops */
+struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
+                       int *fd)
+{
+       struct file *file;
+       int rc, flags, fdtmp;
+
+       flags = O_RDWR | O_CLOEXEC;
+
+       /* This code is similar to anon_inode_getfd() */
+       rc = get_unused_fd_flags(flags);
+       if (rc < 0)
+               return ERR_PTR(rc);
+       fdtmp = rc;
+
+       /*
+        * Patch the file ops.  Needs to be careful that this is rentrant safe.
+        */
+       if (fops) {
+               PATCH_FOPS(open);
+               PATCH_FOPS(poll);
+               PATCH_FOPS(read);
+               PATCH_FOPS(release);
+               PATCH_FOPS(unlocked_ioctl);
+               PATCH_FOPS(compat_ioctl);
+               PATCH_FOPS(mmap);
+       } else /* use default ops */
+               fops = (struct file_operations *)&afu_fops;
+
+       file = anon_inode_getfile("cxl", fops, ctx, flags);
+       if (IS_ERR(file))
+               put_unused_fd(fdtmp);
+       *fd = fdtmp;
+       return file;
+}
+EXPORT_SYMBOL_GPL(cxl_get_fd);
+
+struct cxl_context *cxl_fops_get_context(struct file *file)
+{
+       return file->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_fops_get_context);
+
+int cxl_start_work(struct cxl_context *ctx,
+                  struct cxl_ioctl_start_work *work)
+{
+       int rc;
+
+       /* code taken from afu_ioctl_start_work */
+       if (!(work->flags & CXL_START_WORK_NUM_IRQS))
+               work->num_interrupts = ctx->afu->pp_irqs;
+       else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
+                (work->num_interrupts > ctx->afu->irqs_max)) {
+               return -EINVAL;
+       }
+
+       rc = afu_register_irqs(ctx, work->num_interrupts);
+       if (rc)
+               return rc;
+
+       rc = cxl_start_context(ctx, work->work_element_descriptor, current);
+       if (rc < 0) {
+               afu_release_irqs(ctx, ctx);
+               return rc;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_start_work);
+
+void __iomem *cxl_psa_map(struct cxl_context *ctx)
+{
+       struct cxl_afu *afu = ctx->afu;
+       int rc;
+
+       rc = cxl_afu_check_and_enable(afu);
+       if (rc)
+               return NULL;
+
+       pr_devel("%s: psn_phys%llx size:%llx\n",
+                __func__, afu->psn_phys, afu->adapter->ps_size);
+       return ioremap(ctx->psn_phys, ctx->psn_size);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_map);
+
+void cxl_psa_unmap(void __iomem *addr)
+{
+       iounmap(addr);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_unmap);
+
+int cxl_afu_reset(struct cxl_context *ctx)
+{
+       struct cxl_afu *afu = ctx->afu;
+       int rc;
+
+       rc = __cxl_afu_reset(afu);
+       if (rc)
+               return rc;
+
+       return cxl_afu_check_and_enable(afu);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_reset);
index 0654ad83675eb6fdc581b42f4a48cb859df5fab6..a9f0dd3255a2a0cfa81842647a9dd8118b049859 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <asm/errno.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 #include "cxl.h"
 
 /* protected by rcu */
index d1b55fe62817dcd0261ab926a704a37f590ca67c..2a4c80ac322ad2500a13fc6162d8920ac4a6b8db 100644 (file)
@@ -174,7 +174,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
  * return until all outstanding interrupts for this context have completed. The
  * hardware should no longer access *ctx after this has returned.
  */
-static void __detach_context(struct cxl_context *ctx)
+int __detach_context(struct cxl_context *ctx)
 {
        enum cxl_context_status status;
 
@@ -183,12 +183,13 @@ static void __detach_context(struct cxl_context *ctx)
        ctx->status = CLOSED;
        mutex_unlock(&ctx->status_mutex);
        if (status != STARTED)
-               return;
+               return -EBUSY;
 
        WARN_ON(cxl_detach_process(ctx));
-       afu_release_irqs(ctx);
        flush_work(&ctx->fault_work); /* Only needed for dedicated process */
-       wake_up_all(&ctx->wq);
+       put_pid(ctx->pid);
+       cxl_ctx_put();
+       return 0;
 }
 
 /*
@@ -199,7 +200,14 @@ static void __detach_context(struct cxl_context *ctx)
  */
 void cxl_context_detach(struct cxl_context *ctx)
 {
-       __detach_context(ctx);
+       int rc;
+
+       rc = __detach_context(ctx);
+       if (rc)
+               return;
+
+       afu_release_irqs(ctx, ctx);
+       wake_up_all(&ctx->wq);
 }
 
 /*
@@ -216,7 +224,7 @@ void cxl_context_detach_all(struct cxl_afu *afu)
                 * Anything done in here needs to be setup before the IDR is
                 * created and torn down after the IDR removed
                 */
-               __detach_context(ctx);
+               cxl_context_detach(ctx);
 
                /*
                 * We are force detaching - remove any active PSA mappings so
@@ -232,16 +240,20 @@ void cxl_context_detach_all(struct cxl_afu *afu)
        mutex_unlock(&afu->contexts_lock);
 }
 
-void cxl_context_free(struct cxl_context *ctx)
+static void reclaim_ctx(struct rcu_head *rcu)
 {
-       mutex_lock(&ctx->afu->contexts_lock);
-       idr_remove(&ctx->afu->contexts_idr, ctx->pe);
-       mutex_unlock(&ctx->afu->contexts_lock);
-       synchronize_rcu();
+       struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
 
        free_page((u64)ctx->sstp);
        ctx->sstp = NULL;
 
-       put_pid(ctx->pid);
        kfree(ctx);
 }
+
+void cxl_context_free(struct cxl_context *ctx)
+{
+       mutex_lock(&ctx->afu->contexts_lock);
+       idr_remove(&ctx->afu->contexts_idr, ctx->pe);
+       mutex_unlock(&ctx->afu->contexts_lock);
+       call_rcu(&ctx->rcu, reclaim_ctx);
+}
index a1cee4767ec6c571344e93ca874e8bdac83ed3a5..4fd66cabde1ef174ba0781115400ca0f4b1d337a 100644 (file)
 #include <linux/pid.h>
 #include <linux/io.h>
 #include <linux/pci.h>
+#include <linux/fs.h>
 #include <asm/cputable.h>
 #include <asm/mmu.h>
 #include <asm/reg.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include <uapi/misc/cxl.h>
 
@@ -315,8 +316,6 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_MAX_SLICES 4
 #define MAX_AFU_MMIO_REGS 3
 
-#define CXL_MODE_DEDICATED   0x1
-#define CXL_MODE_DIRECTED    0x2
 #define CXL_MODE_TIME_SLICED 0x4
 #define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED)
 
@@ -362,6 +361,10 @@ struct cxl_afu {
        struct mutex spa_mutex;
        spinlock_t afu_cntl_lock;
 
+       /* AFU error buffer fields and bin attribute for sysfs */
+       u64 eb_len, eb_offset;
+       struct bin_attribute attr_eb;
+
        /*
         * Only the first part of the SPA is used for the process element
         * linked list. The only other part that software needs to worry about
@@ -375,6 +378,9 @@ struct cxl_afu {
        int spa_max_procs;
        unsigned int psl_virq;
 
+       /* pointer to the vphb */
+       struct pci_controller *phb;
+
        int pp_irqs;
        int irqs_max;
        int num_procs;
@@ -455,6 +461,8 @@ struct cxl_context {
        bool pending_irq;
        bool pending_fault;
        bool pending_afu_err;
+
+       struct rcu_head rcu;
 };
 
 struct cxl {
@@ -563,6 +571,9 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg
 u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
 u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
 
+ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+                               loff_t off, size_t count);
+
 
 struct cxl_calls {
        void (*cxl_slbia)(struct mm_struct *mm);
@@ -606,7 +617,7 @@ void cxl_release_psl_err_irq(struct cxl *adapter);
 int cxl_register_serr_irq(struct cxl_afu *afu);
 void cxl_release_serr_irq(struct cxl_afu *afu);
 int afu_register_irqs(struct cxl_context *ctx, u32 count);
-void afu_release_irqs(struct cxl_context *ctx);
+void afu_release_irqs(struct cxl_context *ctx, void *cookie);
 irqreturn_t cxl_slice_irq_err(int irq, void *data);
 
 int cxl_debugfs_init(void);
@@ -629,6 +640,10 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
                     struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+                        irq_handler_t handler, void *cookie, const char *name);
+void cxl_unmap_irq(unsigned int virq, void *cookie);
+int __detach_context(struct cxl_context *ctx);
 
 /* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */
 struct cxl_irq_info {
@@ -642,6 +657,7 @@ struct cxl_irq_info {
        u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */
 };
 
+void cxl_assign_psn_space(struct cxl_context *ctx);
 int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
                            u64 amr);
 int cxl_detach_process(struct cxl_context *ctx);
@@ -653,11 +669,23 @@ int cxl_check_error(struct cxl_afu *afu);
 int cxl_afu_slbia(struct cxl_afu *afu);
 int cxl_tlb_slb_invalidate(struct cxl *adapter);
 int cxl_afu_disable(struct cxl_afu *afu);
-int cxl_afu_reset(struct cxl_afu *afu);
+int __cxl_afu_reset(struct cxl_afu *afu);
+int cxl_afu_check_and_enable(struct cxl_afu *afu);
 int cxl_psl_purge(struct cxl_afu *afu);
 
 void cxl_stop_trace(struct cxl *cxl);
+int cxl_pci_vphb_add(struct cxl_afu *afu);
+void cxl_pci_vphb_remove(struct cxl_afu *afu);
 
 extern struct pci_driver cxl_pci_driver;
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count);
+
+int afu_open(struct inode *inode, struct file *file);
+int afu_release(struct inode *inode, struct file *file);
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int afu_mmap(struct file *file, struct vm_area_struct *vm);
+unsigned int afu_poll(struct file *file, struct poll_table_struct *poll);
+ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off);
+extern const struct file_operations afu_fops;
 
 #endif
index 5286b8b704f559eb21c1c7c5b2b4af3b9180444a..25a5418c55cb897e245a8faf8f728f059f5756fa 100644 (file)
@@ -172,8 +172,8 @@ void cxl_handle_fault(struct work_struct *fault_work)
                container_of(fault_work, struct cxl_context, fault_work);
        u64 dsisr = ctx->dsisr;
        u64 dar = ctx->dar;
-       struct task_struct *task;
-       struct mm_struct *mm;
+       struct task_struct *task = NULL;
+       struct mm_struct *mm = NULL;
 
        if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
            cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
@@ -194,17 +194,19 @@ void cxl_handle_fault(struct work_struct *fault_work)
        pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
                "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 
-       if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
-               pr_devel("cxl_handle_fault unable to get task %i\n",
-                        pid_nr(ctx->pid));
-               cxl_ack_ae(ctx);
-               return;
-       }
-       if (!(mm = get_task_mm(task))) {
-               pr_devel("cxl_handle_fault unable to get mm %i\n",
-                        pid_nr(ctx->pid));
-               cxl_ack_ae(ctx);
-               goto out;
+       if (!ctx->kernel) {
+               if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
+                       pr_devel("cxl_handle_fault unable to get task %i\n",
+                                pid_nr(ctx->pid));
+                       cxl_ack_ae(ctx);
+                       return;
+               }
+               if (!(mm = get_task_mm(task))) {
+                       pr_devel("cxl_handle_fault unable to get mm %i\n",
+                                pid_nr(ctx->pid));
+                       cxl_ack_ae(ctx);
+                       goto out;
+               }
        }
 
        if (dsisr & CXL_PSL_DSISR_An_DS)
@@ -214,9 +216,11 @@ void cxl_handle_fault(struct work_struct *fault_work)
        else
                WARN(1, "cxl_handle_fault has nothing to handle\n");
 
-       mmput(mm);
+       if (mm)
+               mmput(mm);
 out:
-       put_task_struct(task);
+       if (task)
+               put_task_struct(task);
 }
 
 static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
index 2364bcadb9a94c195abc6398a77c734f45a7e468..e3f4b69527a9bd2df592c36227c536c8a3321f50 100644 (file)
@@ -96,7 +96,8 @@ err_put_adapter:
        put_device(&adapter->dev);
        return rc;
 }
-static int afu_open(struct inode *inode, struct file *file)
+
+int afu_open(struct inode *inode, struct file *file)
 {
        return __afu_open(inode, file, false);
 }
@@ -106,7 +107,7 @@ static int afu_master_open(struct inode *inode, struct file *file)
        return __afu_open(inode, file, true);
 }
 
-static int afu_release(struct inode *inode, struct file *file)
+int afu_release(struct inode *inode, struct file *file)
 {
        struct cxl_context *ctx = file->private_data;
 
@@ -128,7 +129,6 @@ static int afu_release(struct inode *inode, struct file *file)
         */
        cxl_context_free(ctx);
 
-       cxl_ctx_put();
        return 0;
 }
 
@@ -191,7 +191,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 
        if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor,
                                     amr))) {
-               afu_release_irqs(ctx);
+               afu_release_irqs(ctx, ctx);
                goto out;
        }
 
@@ -212,7 +212,26 @@ static long afu_ioctl_process_element(struct cxl_context *ctx,
        return 0;
 }
 
-static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long afu_ioctl_get_afu_id(struct cxl_context *ctx,
+                                struct cxl_afu_id __user *upafuid)
+{
+       struct cxl_afu_id afuid = { 0 };
+
+       afuid.card_id = ctx->afu->adapter->adapter_num;
+       afuid.afu_offset = ctx->afu->slice;
+       afuid.afu_mode = ctx->afu->current_mode;
+
+       /* set the flag bit in case the afu is a slave */
+       if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master)
+               afuid.flags |= CXL_AFUID_FLAG_SLAVE;
+
+       if (copy_to_user(upafuid, &afuid, sizeof(afuid)))
+               return -EFAULT;
+
+       return 0;
+}
+
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct cxl_context *ctx = file->private_data;
 
@@ -225,17 +244,20 @@ static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg);
        case CXL_IOCTL_GET_PROCESS_ELEMENT:
                return afu_ioctl_process_element(ctx, (__u32 __user *)arg);
+       case CXL_IOCTL_GET_AFU_ID:
+               return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *)
+                                           arg);
        }
        return -EINVAL;
 }
 
-static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+long afu_compat_ioctl(struct file *file, unsigned int cmd,
                             unsigned long arg)
 {
        return afu_ioctl(file, cmd, arg);
 }
 
-static int afu_mmap(struct file *file, struct vm_area_struct *vm)
+int afu_mmap(struct file *file, struct vm_area_struct *vm)
 {
        struct cxl_context *ctx = file->private_data;
 
@@ -246,7 +268,7 @@ static int afu_mmap(struct file *file, struct vm_area_struct *vm)
        return cxl_context_iomap(ctx, vm);
 }
 
-static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
+unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
 {
        struct cxl_context *ctx = file->private_data;
        int mask = 0;
@@ -278,7 +300,7 @@ static inline int ctx_event_pending(struct cxl_context *ctx)
            ctx->pending_afu_err || (ctx->status == CLOSED));
 }
 
-static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+ssize_t afu_read(struct file *file, char __user *buf, size_t count,
                        loff_t *off)
 {
        struct cxl_context *ctx = file->private_data;
@@ -359,7 +381,11 @@ out:
        return rc;
 }
 
-static const struct file_operations afu_fops = {
+/* 
+ * Note: if this is updated, we need to update api.c to patch the new ones in
+ * too
+ */
+const struct file_operations afu_fops = {
        .owner          = THIS_MODULE,
        .open           = afu_open,
        .poll           = afu_poll,
@@ -370,7 +396,7 @@ static const struct file_operations afu_fops = {
        .mmap           = afu_mmap,
 };
 
-static const struct file_operations afu_master_fops = {
+const struct file_operations afu_master_fops = {
        .owner          = THIS_MODULE,
        .open           = afu_master_open,
        .poll           = afu_poll,
index c8929c526691706b0c7d566b3e32bed760aad062..680cd263436db547c24ec890ca07ba114813fb33 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/slab.h>
 #include <linux/pid.h>
 #include <asm/cputable.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "cxl.h"
 #include "trace.h"
@@ -416,9 +416,8 @@ void afu_irq_name_free(struct cxl_context *ctx)
        }
 }
 
-int afu_register_irqs(struct cxl_context *ctx, u32 count)
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
 {
-       irq_hw_number_t hwirq;
        int rc, r, i, j = 1;
        struct cxl_irq_name *irq_name;
 
@@ -458,6 +457,18 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count)
                        j++;
                }
        }
+       return 0;
+
+out:
+       afu_irq_name_free(ctx);
+       return -ENOMEM;
+}
+
+void afu_register_hwirqs(struct cxl_context *ctx)
+{
+       irq_hw_number_t hwirq;
+       struct cxl_irq_name *irq_name;
+       int r,i;
 
        /* We've allocated all memory now, so let's do the irq allocations */
        irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list);
@@ -469,15 +480,21 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count)
                        irq_name = list_next_entry(irq_name, list);
                }
        }
+}
 
-       return 0;
+int afu_register_irqs(struct cxl_context *ctx, u32 count)
+{
+       int rc;
 
-out:
-       afu_irq_name_free(ctx);
-       return -ENOMEM;
-}
+       rc = afu_allocate_irqs(ctx, count);
+       if (rc)
+               return rc;
+
+       afu_register_hwirqs(ctx);
+       return 0;
+ }
 
-void afu_release_irqs(struct cxl_context *ctx)
+void afu_release_irqs(struct cxl_context *ctx, void *cookie)
 {
        irq_hw_number_t hwirq;
        unsigned int virq;
@@ -488,7 +505,7 @@ void afu_release_irqs(struct cxl_context *ctx)
                for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
                        virq = irq_find_mapping(NULL, hwirq);
                        if (virq)
-                               cxl_unmap_irq(virq, ctx);
+                               cxl_unmap_irq(virq, cookie);
                }
        }
 
index 8ccddceead66715f64d1a9099a4533e1bf45ce1e..833348e2c9cbc161128d2666f0329e4b18ea536b 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/idr.h>
 #include <linux/pci.h>
 #include <asm/cputable.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "cxl.h"
 #include "trace.h"
index 29185fc61276706986e2ca6d28b640efc01f3c6a..10567f245818b73bbdb1272763834b48a9c85c5a 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <asm/synch.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "cxl.h"
 #include "trace.h"
@@ -73,7 +73,7 @@ int cxl_afu_disable(struct cxl_afu *afu)
 }
 
 /* This will disable as well as reset */
-int cxl_afu_reset(struct cxl_afu *afu)
+int __cxl_afu_reset(struct cxl_afu *afu)
 {
        pr_devel("AFU reset request\n");
 
@@ -83,7 +83,7 @@ int cxl_afu_reset(struct cxl_afu *afu)
                           false);
 }
 
-static int afu_check_and_enable(struct cxl_afu *afu)
+int cxl_afu_check_and_enable(struct cxl_afu *afu)
 {
        if (afu->enabled)
                return 0;
@@ -379,7 +379,7 @@ static int remove_process_element(struct cxl_context *ctx)
 }
 
 
-static void assign_psn_space(struct cxl_context *ctx)
+void cxl_assign_psn_space(struct cxl_context *ctx)
 {
        if (!ctx->afu->pp_size || ctx->master) {
                ctx->psn_phys = ctx->afu->psn_phys;
@@ -430,34 +430,46 @@ err:
 #define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
 #endif
 
+static u64 calculate_sr(struct cxl_context *ctx)
+{
+       u64 sr = 0;
+
+       if (ctx->master)
+               sr |= CXL_PSL_SR_An_MP;
+       if (mfspr(SPRN_LPCR) & LPCR_TC)
+               sr |= CXL_PSL_SR_An_TC;
+       if (ctx->kernel) {
+               sr |= CXL_PSL_SR_An_R | (mfmsr() & MSR_SF);
+               sr |= CXL_PSL_SR_An_HV;
+       } else {
+               sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+               set_endian(sr);
+               sr &= ~(CXL_PSL_SR_An_HV);
+               if (!test_tsk_thread_flag(current, TIF_32BIT))
+                       sr |= CXL_PSL_SR_An_SF;
+       }
+       return sr;
+}
+
 static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
 {
-       u64 sr;
+       u32 pid;
        int r, result;
 
-       assign_psn_space(ctx);
+       cxl_assign_psn_space(ctx);
 
        ctx->elem->ctxtime = 0; /* disable */
        ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
        ctx->elem->haurp = 0; /* disable */
        ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1));
 
-       sr = 0;
-       if (ctx->master)
-               sr |= CXL_PSL_SR_An_MP;
-       if (mfspr(SPRN_LPCR) & LPCR_TC)
-               sr |= CXL_PSL_SR_An_TC;
-       /* HV=0, PR=1, R=1 for userspace
-        * For kernel contexts: this would need to change
-        */
-       sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
-       set_endian(sr);
-       sr &= ~(CXL_PSL_SR_An_HV);
-       if (!test_tsk_thread_flag(current, TIF_32BIT))
-               sr |= CXL_PSL_SR_An_SF;
-       ctx->elem->common.pid = cpu_to_be32(current->pid);
+       pid = current->pid;
+       if (ctx->kernel)
+               pid = 0;
        ctx->elem->common.tid = 0;
-       ctx->elem->sr = cpu_to_be64(sr);
+       ctx->elem->common.pid = cpu_to_be32(pid);
+
+       ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
 
        ctx->elem->common.csrp = 0; /* disable */
        ctx->elem->common.aurp0 = 0; /* disable */
@@ -477,7 +489,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
        ctx->elem->common.wed = cpu_to_be64(wed);
 
        /* first guy needs to enable */
-       if ((result = afu_check_and_enable(ctx->afu)))
+       if ((result = cxl_afu_check_and_enable(ctx->afu)))
                return result;
 
        add_process_element(ctx);
@@ -495,7 +507,7 @@ static int deactivate_afu_directed(struct cxl_afu *afu)
        cxl_sysfs_afu_m_remove(afu);
        cxl_chardev_afu_remove(afu);
 
-       cxl_afu_reset(afu);
+       __cxl_afu_reset(afu);
        cxl_afu_disable(afu);
        cxl_psl_purge(afu);
 
@@ -530,20 +542,15 @@ static int activate_dedicated_process(struct cxl_afu *afu)
 static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
 {
        struct cxl_afu *afu = ctx->afu;
-       u64 sr;
+       u64 pid;
        int rc;
 
-       sr = 0;
-       set_endian(sr);
-       if (ctx->master)
-               sr |= CXL_PSL_SR_An_MP;
-       if (mfspr(SPRN_LPCR) & LPCR_TC)
-               sr |= CXL_PSL_SR_An_TC;
-       sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
-       if (!test_tsk_thread_flag(current, TIF_32BIT))
-               sr |= CXL_PSL_SR_An_SF;
-       cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32);
-       cxl_p1n_write(afu, CXL_PSL_SR_An, sr);
+       pid = (u64)current->pid << 32;
+       if (ctx->kernel)
+               pid = 0;
+       cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid);
+
+       cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx));
 
        if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
                return rc;
@@ -564,9 +571,9 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
        cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
 
        /* master only context for dedicated */
-       assign_psn_space(ctx);
+       cxl_assign_psn_space(ctx);
 
-       if ((rc = cxl_afu_reset(afu)))
+       if ((rc = __cxl_afu_reset(afu)))
                return rc;
 
        cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
@@ -629,7 +636,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
 
 static inline int detach_process_native_dedicated(struct cxl_context *ctx)
 {
-       cxl_afu_reset(ctx->afu);
+       __cxl_afu_reset(ctx->afu);
        cxl_afu_disable(ctx->afu);
        cxl_psl_purge(ctx->afu);
        return 0;
index 1ef01647265f99b6330bbba6fdd1832707228cab..c68ef5806dbe122503742f57c6d1259a599b7cf7 100644 (file)
@@ -90,6 +90,7 @@
 /* This works a little different than the p1/p2 register accesses to make it
  * easier to pull out individual fields */
 #define AFUD_READ(afu, off)            in_be64(afu->afu_desc_mmio + off)
+#define AFUD_READ_LE(afu, off)         in_le64(afu->afu_desc_mmio + off)
 #define EXTRACT_PPC_BIT(val, bit)      (!!(val & PPC_BIT(bit)))
 #define EXTRACT_PPC_BITS(val, bs, be)  ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be))
 
@@ -204,7 +205,7 @@ static void dump_cxl_config_space(struct pci_dev *dev)
        dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n",
                p1_base(dev), p1_size(dev));
        dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n",
-               p1_base(dev), p2_size(dev));
+               p2_base(dev), p2_size(dev));
        dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n",
                pci_resource_start(dev, 4), pci_resource_len(dev, 4));
 
@@ -286,7 +287,8 @@ static void dump_cxl_config_space(struct pci_dev *dev)
 
 static void dump_afu_descriptor(struct cxl_afu *afu)
 {
-       u64 val;
+       u64 val, afu_cr_num, afu_cr_off, afu_cr_len;
+       int i;
 
 #define show_reg(name, what) \
        dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what)
@@ -296,6 +298,7 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
        show_reg("num_of_processes", AFUD_NUM_PROCS(val));
        show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val));
        show_reg("req_prog_mode", val & 0xffffULL);
+       afu_cr_num = AFUD_NUM_CRS(val);
 
        val = AFUD_READ(afu, 0x8);
        show_reg("Reserved", val);
@@ -307,8 +310,10 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
        val = AFUD_READ_CR(afu);
        show_reg("Reserved", (val >> (63-7)) & 0xff);
        show_reg("AFU_CR_len", AFUD_CR_LEN(val));
+       afu_cr_len = AFUD_CR_LEN(val) * 256;
 
        val = AFUD_READ_CR_OFF(afu);
+       afu_cr_off = val;
        show_reg("AFU_CR_offset", val);
 
        val = AFUD_READ_PPPSA(afu);
@@ -325,6 +330,11 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
        val = AFUD_READ_EB_OFF(afu);
        show_reg("AFU_EB_offset", val);
 
+       for (i = 0; i < afu_cr_num; i++) {
+               val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len);
+               show_reg("CR Vendor", val & 0xffff);
+               show_reg("CR Device", (val >> 16) & 0xffff);
+       }
 #undef show_reg
 }
 
@@ -593,6 +603,22 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu)
        afu->crs_len = AFUD_CR_LEN(val) * 256;
        afu->crs_offset = AFUD_READ_CR_OFF(afu);
 
+
+       /* eb_len is in multiple of 4K */
+       afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096;
+       afu->eb_offset = AFUD_READ_EB_OFF(afu);
+
+       /* eb_off is 4K aligned so lower 12 bits are always zero */
+       if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) {
+               dev_warn(&afu->dev,
+                        "Invalid AFU error buffer offset %Lx\n",
+                        afu->eb_offset);
+               dev_info(&afu->dev,
+                        "Ignoring AFU error buffer in the descriptor\n");
+               /* indicate that no afu buffer exists */
+               afu->eb_len = 0;
+       }
+
        return 0;
 }
 
@@ -631,7 +657,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
        reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
        if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
                dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg);
-               if (cxl_afu_reset(afu))
+               if (__cxl_afu_reset(afu))
                        return -EIO;
                if (cxl_afu_disable(afu))
                        return -EIO;
@@ -672,6 +698,50 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
        return 0;
 }
 
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+/*
+ * afu_eb_read:
+ * Called from sysfs and reads the afu error info buffer. The h/w only supports
+ * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte
+ * aligned the function uses a bounce buffer which can be max PAGE_SIZE.
+ */
+ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+                               loff_t off, size_t count)
+{
+       loff_t aligned_start, aligned_end;
+       size_t aligned_length;
+       void *tbuf;
+       const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset;
+
+       if (count == 0 || off < 0 || (size_t)off >= afu->eb_len)
+               return 0;
+
+       /* calculate aligned read window */
+       count = min((size_t)(afu->eb_len - off), count);
+       aligned_start = round_down(off, 8);
+       aligned_end = round_up(off + count, 8);
+       aligned_length = aligned_end - aligned_start;
+
+       /* max we can copy in one read is PAGE_SIZE */
+       if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) {
+               aligned_length = ERR_BUFF_MAX_COPY_SIZE;
+               count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+       }
+
+       /* use bounce buffer for copy */
+       tbuf = (void *)__get_free_page(GFP_TEMPORARY);
+       if (!tbuf)
+               return -ENOMEM;
+
+       /* perform aligned read from the mmio region */
+       memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length);
+       memcpy(buf, tbuf + (off & 0x7), count);
+
+       free_page((unsigned long)tbuf);
+
+       return count;
+}
+
 static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 {
        struct cxl_afu *afu;
@@ -691,7 +761,7 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
                goto err2;
 
        /* We need to reset the AFU before we can read the AFU descriptor */
-       if ((rc = cxl_afu_reset(afu)))
+       if ((rc = __cxl_afu_reset(afu)))
                goto err2;
 
        if (cxl_verbose)
@@ -731,6 +801,9 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 
        adapter->afu[afu->slice] = afu;
 
+       if ((rc = cxl_pci_vphb_add(afu)))
+               dev_info(&afu->dev, "Can't register vPHB\n");
+
        return 0;
 
 err_put2:
@@ -783,8 +856,10 @@ int cxl_reset(struct cxl *adapter)
 
        dev_info(&dev->dev, "CXL reset\n");
 
-       for (i = 0; i < adapter->slices; i++)
+       for (i = 0; i < adapter->slices; i++) {
+               cxl_pci_vphb_remove(adapter->afu[i]);
                cxl_remove_afu(adapter->afu[i]);
+       }
 
        /* pcie_warm_reset requests a fundamental pci reset which includes a
         * PERST assert/deassert.  PERST triggers a loading of the image
@@ -857,13 +932,13 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
        u8 image_state;
 
        if (!(vsec = find_cxl_vsec(dev))) {
-               dev_err(&adapter->dev, "ABORTING: CXL VSEC not found!\n");
+               dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
                return -ENODEV;
        }
 
        CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen);
        if (vseclen < CXL_VSEC_MIN_SIZE) {
-               pr_err("ABORTING: CXL VSEC too short\n");
+               dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n");
                return -EINVAL;
        }
 
@@ -902,24 +977,24 @@ static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev)
                return -EBUSY;
 
        if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) {
-               dev_err(&adapter->dev, "ABORTING: CXL requires unsupported features\n");
+               dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n");
                return -EINVAL;
        }
 
        if (!adapter->slices) {
                /* Once we support dynamic reprogramming we can use the card if
                 * it supports loadable AFUs */
-               dev_err(&adapter->dev, "ABORTING: Device has no AFUs\n");
+               dev_err(&dev->dev, "ABORTING: Device has no AFUs\n");
                return -EINVAL;
        }
 
        if (!adapter->afu_desc_off || !adapter->afu_desc_size) {
-               dev_err(&adapter->dev, "ABORTING: VSEC shows no AFU descriptors\n");
+               dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n");
                return -EINVAL;
        }
 
        if (adapter->ps_size > p2_size(dev) - adapter->ps_off) {
-               dev_err(&adapter->dev, "ABORTING: Problem state size larger than "
+               dev_err(&dev->dev, "ABORTING: Problem state size larger than "
                                   "available in BAR2: 0x%llx > 0x%llx\n",
                         adapter->ps_size, p2_size(dev) - adapter->ps_off);
                return -EINVAL;
@@ -968,6 +1043,15 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
        if (!(adapter = cxl_alloc_adapter(dev)))
                return ERR_PTR(-ENOMEM);
 
+       if ((rc = cxl_read_vsec(adapter, dev)))
+               goto err1;
+
+       if ((rc = cxl_vsec_looks_ok(adapter, dev)))
+               goto err1;
+
+       if ((rc = setup_cxl_bars(dev)))
+               goto err1;
+
        if ((rc = switch_card_to_cxl(dev)))
                goto err1;
 
@@ -977,12 +1061,6 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
        if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)))
                goto err2;
 
-       if ((rc = cxl_read_vsec(adapter, dev)))
-               goto err2;
-
-       if ((rc = cxl_vsec_looks_ok(adapter, dev)))
-               goto err2;
-
        if ((rc = cxl_update_image_control(adapter)))
                goto err2;
 
@@ -1067,9 +1145,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (cxl_verbose)
                dump_cxl_config_space(dev);
 
-       if ((rc = setup_cxl_bars(dev)))
-               return rc;
-
        if ((rc = pci_enable_device(dev))) {
                dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc);
                return rc;
@@ -1078,6 +1153,7 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
        adapter = cxl_init_adapter(dev);
        if (IS_ERR(adapter)) {
                dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter));
+               pci_disable_device(dev);
                return PTR_ERR(adapter);
        }
 
@@ -1092,16 +1168,18 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 static void cxl_remove(struct pci_dev *dev)
 {
        struct cxl *adapter = pci_get_drvdata(dev);
-       int afu;
-
-       dev_warn(&dev->dev, "pci remove\n");
+       struct cxl_afu *afu;
+       int i;
 
        /*
         * Lock to prevent someone grabbing a ref through the adapter list as
         * we are removing it
         */
-       for (afu = 0; afu < adapter->slices; afu++)
-               cxl_remove_afu(adapter->afu[afu]);
+       for (i = 0; i < adapter->slices; i++) {
+               afu = adapter->afu[i];
+               cxl_pci_vphb_remove(afu);
+               cxl_remove_afu(afu);
+       }
        cxl_remove_adapter(adapter);
 }
 
@@ -1110,4 +1188,5 @@ struct pci_driver cxl_pci_driver = {
        .id_table = cxl_pci_tbl,
        .probe = cxl_probe,
        .remove = cxl_remove,
+       .shutdown = cxl_remove,
 };
index d0c38c7bc0c4bfb552f568f59f4f4371809b3b1f..31f38bc71a3d5d113263998141b62c4b9285d343 100644 (file)
@@ -185,7 +185,7 @@ static ssize_t reset_store_afu(struct device *device,
                goto err;
        }
 
-       if ((rc = cxl_afu_reset(afu)))
+       if ((rc = __cxl_afu_reset(afu)))
                goto err;
 
        rc = count;
@@ -356,6 +356,16 @@ static ssize_t api_version_compatible_show(struct device *device,
        return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE);
 }
 
+static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj,
+                              struct bin_attribute *bin_attr, char *buf,
+                              loff_t off, size_t count)
+{
+       struct cxl_afu *afu = to_cxl_afu(container_of(kobj,
+                                                     struct device, kobj));
+
+       return cxl_afu_read_err_buffer(afu, buf, off, count);
+}
+
 static struct device_attribute afu_attrs[] = {
        __ATTR_RO(mmio_size),
        __ATTR_RO(irqs_min),
@@ -534,6 +544,10 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu)
        struct afu_config_record *cr, *tmp;
        int i;
 
+       /* remove the err buffer bin attribute */
+       if (afu->eb_len)
+               device_remove_bin_file(&afu->dev, &afu->attr_eb);
+
        for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
                device_remove_file(&afu->dev, &afu_attrs[i]);
 
@@ -555,6 +569,22 @@ int cxl_sysfs_afu_add(struct cxl_afu *afu)
                        goto err;
        }
 
+       /* conditionally create the add the binary file for error info buffer */
+       if (afu->eb_len) {
+               afu->attr_eb.attr.name = "afu_err_buff";
+               afu->attr_eb.attr.mode = S_IRUGO;
+               afu->attr_eb.size = afu->eb_len;
+               afu->attr_eb.read = afu_eb_read;
+
+               rc = device_create_bin_file(&afu->dev, &afu->attr_eb);
+               if (rc) {
+                       dev_err(&afu->dev,
+                               "Unable to create eb attr for the afu. Err(%d)\n",
+                               rc);
+                       goto err;
+               }
+       }
+
        for (i = 0; i < afu->crs_num; i++) {
                cr = cxl_sysfs_afu_new_cr(afu, i);
                if (IS_ERR(cr)) {
@@ -570,6 +600,9 @@ err1:
        cxl_sysfs_afu_remove(afu);
        return rc;
 err:
+       /* reset the eb_len as we havent created the bin attr */
+       afu->eb_len = 0;
+
        for (i--; i >= 0; i--)
                device_remove_file(&afu->dev, &afu_attrs[i]);
        return rc;
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
new file mode 100644 (file)
index 0000000..b1d1983
--- /dev/null
@@ -0,0 +1,270 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <misc/cxl.h>
+#include "cxl.h"
+
+static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+       if (dma_mask < DMA_BIT_MASK(64)) {
+               pr_info("%s only 64bit DMA supported on CXL", __func__);
+               return -EIO;
+       }
+
+       *(pdev->dev.dma_mask) = dma_mask;
+       return 0;
+}
+
+static int cxl_pci_probe_mode(struct pci_bus *bus)
+{
+       return PCI_PROBE_NORMAL;
+}
+
+static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+       return -ENODEV;
+}
+
+static void cxl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+       /*
+        * MSI should never be set but need still need to provide this call
+        * back.
+        */
+}
+
+static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
+{
+       struct pci_controller *phb;
+       struct cxl_afu *afu;
+       struct cxl_context *ctx;
+
+       phb = pci_bus_to_host(dev->bus);
+       afu = (struct cxl_afu *)phb->private_data;
+       set_dma_ops(&dev->dev, &dma_direct_ops);
+       set_dma_offset(&dev->dev, PAGE_OFFSET);
+
+       /*
+        * Allocate a context to do cxl things too.  If we eventually do real
+        * DMA ops, we'll need a default context to attach them to
+        */
+       ctx = cxl_dev_context_init(dev);
+       if (!ctx)
+               return false;
+       dev->dev.archdata.cxl_ctx = ctx;
+
+       return (cxl_afu_check_and_enable(afu) == 0);
+}
+
+static void cxl_pci_disable_device(struct pci_dev *dev)
+{
+       struct cxl_context *ctx = cxl_get_context(dev);
+
+       if (ctx) {
+               if (ctx->status == STARTED) {
+                       dev_err(&dev->dev, "Default context started\n");
+                       return;
+               }
+               dev->dev.archdata.cxl_ctx = NULL;
+               cxl_release_context(ctx);
+       }
+}
+
+static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus,
+                                               unsigned long type)
+{
+       return 1;
+}
+
+static void cxl_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+       /* Should we do an AFU reset here ? */
+}
+
+static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
+{
+       return (bus << 8) + devfn;
+}
+
+static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb,
+                                      u8 bus, u8 devfn, int offset)
+{
+       int record = cxl_pcie_cfg_record(bus, devfn);
+
+       return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset;
+}
+
+
+static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+                               int offset, int len,
+                               volatile void __iomem **ioaddr,
+                               u32 *mask, int *shift)
+{
+       struct pci_controller *phb;
+       struct cxl_afu *afu;
+       unsigned long addr;
+
+       phb = pci_bus_to_host(bus);
+       afu = (struct cxl_afu *)phb->private_data;
+       if (phb == NULL)
+               return PCIBIOS_DEVICE_NOT_FOUND;
+       if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
+               return PCIBIOS_DEVICE_NOT_FOUND;
+       if (offset >= (unsigned long)phb->cfg_data)
+               return PCIBIOS_BAD_REGISTER_NUMBER;
+       addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset);
+
+       *ioaddr = (void *)(addr & ~0x3ULL);
+       *shift = ((addr & 0x3) * 8);
+       switch (len) {
+       case 1:
+               *mask = 0xff;
+               break;
+       case 2:
+               *mask = 0xffff;
+               break;
+       default:
+               *mask = 0xffffffff;
+               break;
+       }
+       return 0;
+}
+
+static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+                               int offset, int len, u32 *val)
+{
+       volatile void __iomem *ioaddr;
+       int shift, rc;
+       u32 mask;
+
+       rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
+                                 &mask, &shift);
+       if (rc)
+               return rc;
+
+       /* Can only read 32 bits */
+       *val = (in_le32(ioaddr) >> shift) & mask;
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+                                int offset, int len, u32 val)
+{
+       volatile void __iomem *ioaddr;
+       u32 v, mask;
+       int shift, rc;
+
+       rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
+                                 &mask, &shift);
+       if (rc)
+               return rc;
+
+       /* Can only write 32 bits so do read-modify-write */
+       mask <<= shift;
+       val <<= shift;
+
+       v = (in_le32(ioaddr) & ~mask) || (val & mask);
+
+       out_le32(ioaddr, v);
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops cxl_pcie_pci_ops =
+{
+       .read = cxl_pcie_read_config,
+       .write = cxl_pcie_write_config,
+};
+
+
+static struct pci_controller_ops cxl_pci_controller_ops =
+{
+       .probe_mode = cxl_pci_probe_mode,
+       .enable_device_hook = cxl_pci_enable_device_hook,
+       .disable_device = cxl_pci_disable_device,
+       .release_device = cxl_pci_disable_device,
+       .window_alignment = cxl_pci_window_alignment,
+       .reset_secondary_bus = cxl_pci_reset_secondary_bus,
+       .setup_msi_irqs = cxl_setup_msi_irqs,
+       .teardown_msi_irqs = cxl_teardown_msi_irqs,
+       .dma_set_mask = cxl_dma_set_mask,
+};
+
+int cxl_pci_vphb_add(struct cxl_afu *afu)
+{
+       struct pci_dev *phys_dev;
+       struct pci_controller *phb, *phys_phb;
+
+       phys_dev = to_pci_dev(afu->adapter->dev.parent);
+       phys_phb = pci_bus_to_host(phys_dev->bus);
+
+       /* Alloc and setup PHB data structure */
+       phb = pcibios_alloc_controller(phys_phb->dn);
+
+       if (!phb)
+               return -ENODEV;
+
+       /* Setup parent in sysfs */
+       phb->parent = &phys_dev->dev;
+
+       /* Setup the PHB using arch provided callback */
+       phb->ops = &cxl_pcie_pci_ops;
+       phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
+       phb->cfg_data = (void *)(u64)afu->crs_len;
+       phb->private_data = afu;
+       phb->controller_ops = cxl_pci_controller_ops;
+
+       /* Scan the bus */
+       pcibios_scan_phb(phb);
+       if (phb->bus == NULL)
+               return -ENXIO;
+
+       /* Claim resources. This might need some rework as well depending
+        * whether we are doing probe-only or not, like assigning unassigned
+        * resources etc...
+        */
+       pcibios_claim_one_bus(phb->bus);
+
+       /* Add probed PCI devices to the device model */
+       pci_bus_add_devices(phb->bus);
+
+       afu->phb = phb;
+
+       return 0;
+}
+
+
+void cxl_pci_vphb_remove(struct cxl_afu *afu)
+{
+       struct pci_controller *phb;
+
+       /* If there is no configuration record we won't have one of these */
+       if (!afu || !afu->phb)
+               return;
+
+       phb = afu->phb;
+
+       pci_remove_root_bus(phb->bus);
+}
+
+struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev)
+{
+       struct pci_controller *phb;
+
+       phb = pci_bus_to_host(dev->bus);
+
+       return (struct cxl_afu *)phb->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_afu);
+
+unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev)
+{
+       return cxl_pcie_cfg_record(dev->bus->number, dev->devfn);
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record);
index c49d0b127fefb32c38539810247ff4adc5adc0fa..f73c41697a00e387f500e2f029cd6bc0299f2b57 100644 (file)
@@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
          Testing MTD users (eg JFFS2) on large media and media that might
          be removed during a write (using the floppy drive).
 
+config MTD_POWERNV_FLASH
+       tristate "powernv flash MTD driver"
+       depends on PPC_POWERNV
+       help
+         This provides an MTD device to access flash on powernv OPAL
+         platforms from Linux. This device abstracts away the
+         firmware interface for flash access.
+
 comment "Disk-On-Chip Device Drivers"
 
 config MTD_DOCG3
index f0b0e611d1d6a564b460af1f68c965906f629223..7912d3a0ee343b045daf1dbdf9ae93061afad4eb 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI)   += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)       += sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)        += bcm47xxsflash.o
 obj-$(CONFIG_MTD_ST_SPI_FSM)    += st_spi_fsm.o
+obj-$(CONFIG_MTD_POWERNV_FLASH)        += powernv_flash.o
 
 
 CFLAGS_docg3.o                 += -I$(src)
diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c
new file mode 100644 (file)
index 0000000..d5b870b
--- /dev/null
@@ -0,0 +1,285 @@
+/*
+ * OPAL PNOR flash MTD abstraction
+ *
+ * Copyright IBM 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/opal.h>
+
+
+/*
+ * This driver creates the a Linux MTD abstraction for platform PNOR flash
+ * backed by OPAL calls
+ */
+
+struct powernv_flash {
+       struct mtd_info mtd;
+       u32 id;
+};
+
+enum flash_op {
+       FLASH_OP_READ,
+       FLASH_OP_WRITE,
+       FLASH_OP_ERASE,
+};
+
+static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
+               loff_t offset, size_t len, size_t *retlen, u_char *buf)
+{
+       struct powernv_flash *info = (struct powernv_flash *)mtd->priv;
+       struct device *dev = &mtd->dev;
+       int token;
+       struct opal_msg msg;
+       int rc;
+
+       dev_dbg(dev, "%s(op=%d, offset=0x%llx, len=%zu)\n",
+                       __func__, op, offset, len);
+
+       token = opal_async_get_token_interruptible();
+       if (token < 0) {
+               if (token != -ERESTARTSYS)
+                       dev_err(dev, "Failed to get an async token\n");
+
+               return token;
+       }
+
+       switch (op) {
+       case FLASH_OP_READ:
+               rc = opal_flash_read(info->id, offset, __pa(buf), len, token);
+               break;
+       case FLASH_OP_WRITE:
+               rc = opal_flash_write(info->id, offset, __pa(buf), len, token);
+               break;
+       case FLASH_OP_ERASE:
+               rc = opal_flash_erase(info->id, offset, len, token);
+               break;
+       default:
+               BUG_ON(1);
+       }
+
+       if (rc != OPAL_ASYNC_COMPLETION) {
+               dev_err(dev, "opal_flash_async_op(op=%d) failed (rc %d)\n",
+                               op, rc);
+               opal_async_release_token(token);
+               return -EIO;
+       }
+
+       rc = opal_async_wait_response(token, &msg);
+       opal_async_release_token(token);
+       if (rc) {
+               dev_err(dev, "opal async wait failed (rc %d)\n", rc);
+               return -EIO;
+       }
+
+       rc = be64_to_cpu(msg.params[1]);
+       if (rc == OPAL_SUCCESS) {
+               rc = 0;
+               if (retlen)
+                       *retlen = len;
+       } else {
+               rc = -EIO;
+       }
+
+       return rc;
+}
+
+/**
+ * @mtd: the device
+ * @from: the offset to read from
+ * @len: the number of bytes to read
+ * @retlen: the number of bytes actually read
+ * @buf: the filled in buffer
+ *
+ * Returns 0 if read successful, or -ERRNO if an error occurred
+ */
+static int powernv_flash_read(struct mtd_info *mtd, loff_t from, size_t len,
+            size_t *retlen, u_char *buf)
+{
+       return powernv_flash_async_op(mtd, FLASH_OP_READ, from,
+                       len, retlen, buf);
+}
+
+/**
+ * @mtd: the device
+ * @to: the offset to write to
+ * @len: the number of bytes to write
+ * @retlen: the number of bytes actually written
+ * @buf: the buffer to get bytes from
+ *
+ * Returns 0 if write successful, -ERRNO if error occurred
+ */
+static int powernv_flash_write(struct mtd_info *mtd, loff_t to, size_t len,
+                    size_t *retlen, const u_char *buf)
+{
+       return powernv_flash_async_op(mtd, FLASH_OP_WRITE, to,
+                       len, retlen, (u_char *)buf);
+}
+
+/**
+ * @mtd: the device
+ * @erase: the erase info
+ * Returns 0 if erase successful or -ERRNO if an error occurred
+ */
+static int powernv_flash_erase(struct mtd_info *mtd, struct erase_info *erase)
+{
+       int rc;
+
+       erase->state = MTD_ERASING;
+
+       /* todo: register our own notifier to do a true async implementation */
+       rc =  powernv_flash_async_op(mtd, FLASH_OP_ERASE, erase->addr,
+                       erase->len, NULL, NULL);
+
+       if (rc) {
+               erase->fail_addr = erase->addr;
+               erase->state = MTD_ERASE_FAILED;
+       } else {
+               erase->state = MTD_ERASE_DONE;
+       }
+       mtd_erase_callback(erase);
+       return rc;
+}
+
+/**
+ * powernv_flash_set_driver_info - Fill the mtd_info structure and docg3
+ * structure @pdev: The platform device
+ * @mtd: The structure to fill
+ */
+static int powernv_flash_set_driver_info(struct device *dev,
+               struct mtd_info *mtd)
+{
+       u64 size;
+       u32 erase_size;
+       int rc;
+
+       rc = of_property_read_u32(dev->of_node, "ibm,flash-block-size",
+                       &erase_size);
+       if (rc) {
+               dev_err(dev, "couldn't get resource block size information\n");
+               return rc;
+       }
+
+       rc = of_property_read_u64(dev->of_node, "reg", &size);
+       if (rc) {
+               dev_err(dev, "couldn't get resource size information\n");
+               return rc;
+       }
+
+       /*
+        * Going to have to check what details I need to set and how to
+        * get them
+        */
+       mtd->name = of_get_property(dev->of_node, "name", NULL);
+       mtd->type = MTD_NORFLASH;
+       mtd->flags = MTD_WRITEABLE;
+       mtd->size = size;
+       mtd->erasesize = erase_size;
+       mtd->writebufsize = mtd->writesize = 1;
+       mtd->owner = THIS_MODULE;
+       mtd->_erase = powernv_flash_erase;
+       mtd->_read = powernv_flash_read;
+       mtd->_write = powernv_flash_write;
+       mtd->dev.parent = dev;
+       return 0;
+}
+
+/**
+ * powernv_flash_probe
+ * @pdev: platform device
+ *
+ * Returns 0 on success, -ENOMEM, -ENXIO on error
+ */
+static int powernv_flash_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct powernv_flash *data;
+       int ret;
+
+       data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+       if (!data) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       data->mtd.priv = data;
+
+       ret = of_property_read_u32(dev->of_node, "ibm,opal-id", &(data->id));
+       if (ret) {
+               dev_err(dev, "no device property 'ibm,opal-id'\n");
+               goto out;
+       }
+
+       ret = powernv_flash_set_driver_info(dev, &data->mtd);
+       if (ret)
+               goto out;
+
+       dev_set_drvdata(dev, data);
+
+       /*
+        * The current flash that skiboot exposes is one contiguous flash chip
+        * with an ffs partition at the start, it should prove easier for users
+        * to deal with partitions or not as they see fit
+        */
+       ret = mtd_device_register(&data->mtd, NULL, 0);
+
+out:
+       return ret;
+}
+
+/**
+ * op_release - Release the driver
+ * @pdev: the platform device
+ *
+ * Returns 0
+ */
+static int powernv_flash_release(struct platform_device *pdev)
+{
+       struct powernv_flash *data = dev_get_drvdata(&(pdev->dev));
+
+       /* All resources should be freed automatically */
+       return mtd_device_unregister(&(data->mtd));
+}
+
+static const struct of_device_id powernv_flash_match[] = {
+       { .compatible = "ibm,opal-flash" },
+       {}
+};
+
+static struct platform_driver powernv_flash_driver = {
+       .driver         = {
+               .name           = "powernv_flash",
+               .of_match_table = powernv_flash_match,
+       },
+       .remove         = powernv_flash_release,
+       .probe          = powernv_flash_probe,
+};
+
+module_platform_driver(powernv_flash_driver);
+
+MODULE_DEVICE_TABLE(of, powernv_flash_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Cyril Bur <cyril.bur@au1.ibm.com>");
+MODULE_DESCRIPTION("MTD abstraction for OPAL flash");
index 543b234e70fc33bf728f6b30b66e0ef3048e4e1d..47b54c6aefd2688bfee7c48a506f01f9a5546ec1 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/export.h>
+#include <linux/interrupt.h>
 
 #include <asm/hvconsole.h>
 #include <asm/prom.h>
@@ -61,7 +62,6 @@ static struct hvc_opal_priv *hvc_opal_privs[MAX_NR_HVC_CONSOLES];
 /* For early boot console */
 static struct hvc_opal_priv hvc_opal_boot_priv;
 static u32 hvc_opal_boot_termno;
-static bool hvc_opal_event_registered;
 
 static const struct hv_ops hvc_opal_raw_ops = {
        .get_chars = opal_get_chars,
@@ -162,28 +162,15 @@ static const struct hv_ops hvc_opal_hvsi_ops = {
        .tiocmset = hvc_opal_hvsi_tiocmset,
 };
 
-static int hvc_opal_console_event(struct notifier_block *nb,
-                                 unsigned long events, void *change)
-{
-       if (events & OPAL_EVENT_CONSOLE_INPUT)
-               hvc_kick();
-       return 0;
-}
-
-static struct notifier_block hvc_opal_console_nb = {
-       .notifier_call  = hvc_opal_console_event,
-};
-
 static int hvc_opal_probe(struct platform_device *dev)
 {
        const struct hv_ops *ops;
        struct hvc_struct *hp;
        struct hvc_opal_priv *pv;
        hv_protocol_t proto;
-       unsigned int termno, boot = 0;
+       unsigned int termno, irq, boot = 0;
        const __be32 *reg;
 
-
        if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) {
                proto = HV_PROTOCOL_RAW;
                ops = &hvc_opal_raw_ops;
@@ -227,18 +214,18 @@ static int hvc_opal_probe(struct platform_device *dev)
                dev->dev.of_node->full_name,
                boot ? " (boot console)" : "");
 
-       /* We don't do IRQ ... */
-       hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS);
+       irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT));
+       if (!irq) {
+               pr_err("hvc_opal: Unable to map interrupt for device %s\n",
+                       dev->dev.of_node->full_name);
+               return irq;
+       }
+
+       hp = hvc_alloc(termno, irq, ops, MAX_VIO_PUT_CHARS);
        if (IS_ERR(hp))
                return PTR_ERR(hp);
        dev_set_drvdata(&dev->dev, hp);
 
-       /* ...  but we use OPAL event to kick the console */
-       if (!hvc_opal_event_registered) {
-               opal_notifier_register(&hvc_opal_console_nb);
-               hvc_opal_event_registered = true;
-       }
-
        return 0;
 }
 
index 730b4ef3e0cc3543382930456f923135858cb181..0582b72ef3772cf1257cc330dcf5e32246fe72bb 100644 (file)
 #include <linux/uaccess.h>
 #include <linux/err.h>
 #include <linux/vfio.h>
+#include <linux/vmalloc.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
+#include <asm/mmu_context.h>
 
 #define DRIVER_VERSION  "0.1"
 #define DRIVER_AUTHOR   "aik@ozlabs.ru"
 static void tce_iommu_detach_group(void *iommu_data,
                struct iommu_group *iommu_group);
 
+static long try_increment_locked_vm(long npages)
+{
+       long ret = 0, locked, lock_limit;
+
+       if (!current || !current->mm)
+               return -ESRCH; /* process exited */
+
+       if (!npages)
+               return 0;
+
+       down_write(&current->mm->mmap_sem);
+       locked = current->mm->locked_vm + npages;
+       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+               ret = -ENOMEM;
+       else
+               current->mm->locked_vm += npages;
+
+       pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
+                       npages << PAGE_SHIFT,
+                       current->mm->locked_vm << PAGE_SHIFT,
+                       rlimit(RLIMIT_MEMLOCK),
+                       ret ? " - exceeded" : "");
+
+       up_write(&current->mm->mmap_sem);
+
+       return ret;
+}
+
+static void decrement_locked_vm(long npages)
+{
+       if (!current || !current->mm || !npages)
+               return; /* process exited */
+
+       down_write(&current->mm->mmap_sem);
+       if (WARN_ON_ONCE(npages > current->mm->locked_vm))
+               npages = current->mm->locked_vm;
+       current->mm->locked_vm -= npages;
+       pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
+                       npages << PAGE_SHIFT,
+                       current->mm->locked_vm << PAGE_SHIFT,
+                       rlimit(RLIMIT_MEMLOCK));
+       up_write(&current->mm->mmap_sem);
+}
+
 /*
  * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
  *
@@ -36,6 +83,11 @@ static void tce_iommu_detach_group(void *iommu_data,
  * into DMA'ble space using the IOMMU
  */
 
+struct tce_iommu_group {
+       struct list_head next;
+       struct iommu_group *grp;
+};
+
 /*
  * The container descriptor supports only a single group per container.
  * Required by the API as the container is not supplied with the IOMMU group
@@ -43,18 +95,140 @@ static void tce_iommu_detach_group(void *iommu_data,
  */
 struct tce_container {
        struct mutex lock;
-       struct iommu_table *tbl;
        bool enabled;
+       bool v2;
+       unsigned long locked_pages;
+       struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+       struct list_head group_list;
 };
 
+static long tce_iommu_unregister_pages(struct tce_container *container,
+               __u64 vaddr, __u64 size)
+{
+       struct mm_iommu_table_group_mem_t *mem;
+
+       if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+               return -EINVAL;
+
+       mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT);
+       if (!mem)
+               return -ENOENT;
+
+       return mm_iommu_put(mem);
+}
+
+static long tce_iommu_register_pages(struct tce_container *container,
+               __u64 vaddr, __u64 size)
+{
+       long ret = 0;
+       struct mm_iommu_table_group_mem_t *mem = NULL;
+       unsigned long entries = size >> PAGE_SHIFT;
+
+       if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
+                       ((vaddr + size) < vaddr))
+               return -EINVAL;
+
+       ret = mm_iommu_get(vaddr, entries, &mem);
+       if (ret)
+               return ret;
+
+       container->enabled = true;
+
+       return 0;
+}
+
+static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
+{
+       unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+                       tbl->it_size, PAGE_SIZE);
+       unsigned long *uas;
+       long ret;
+
+       BUG_ON(tbl->it_userspace);
+
+       ret = try_increment_locked_vm(cb >> PAGE_SHIFT);
+       if (ret)
+               return ret;
+
+       uas = vzalloc(cb);
+       if (!uas) {
+               decrement_locked_vm(cb >> PAGE_SHIFT);
+               return -ENOMEM;
+       }
+       tbl->it_userspace = uas;
+
+       return 0;
+}
+
+static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
+{
+       unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+                       tbl->it_size, PAGE_SIZE);
+
+       if (!tbl->it_userspace)
+               return;
+
+       vfree(tbl->it_userspace);
+       tbl->it_userspace = NULL;
+       decrement_locked_vm(cb >> PAGE_SHIFT);
+}
+
+static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+{
+       /*
+        * Check that the TCE table granularity is not bigger than the size of
+        * a page we just found. Otherwise the hardware can get access to
+        * a bigger memory chunk that it should.
+        */
+       return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
+}
+
+static inline bool tce_groups_attached(struct tce_container *container)
+{
+       return !list_empty(&container->group_list);
+}
+
+static long tce_iommu_find_table(struct tce_container *container,
+               phys_addr_t ioba, struct iommu_table **ptbl)
+{
+       long i;
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               struct iommu_table *tbl = container->tables[i];
+
+               if (tbl) {
+                       unsigned long entry = ioba >> tbl->it_page_shift;
+                       unsigned long start = tbl->it_offset;
+                       unsigned long end = start + tbl->it_size;
+
+                       if ((start <= entry) && (entry < end)) {
+                               *ptbl = tbl;
+                               return i;
+                       }
+               }
+       }
+
+       return -1;
+}
+
+static int tce_iommu_find_free_table(struct tce_container *container)
+{
+       int i;
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               if (!container->tables[i])
+                       return i;
+       }
+
+       return -ENOSPC;
+}
+
 static int tce_iommu_enable(struct tce_container *container)
 {
        int ret = 0;
-       unsigned long locked, lock_limit, npages;
-       struct iommu_table *tbl = container->tbl;
-
-       if (!container->tbl)
-               return -ENXIO;
+       unsigned long locked;
+       struct iommu_table_group *table_group;
+       struct tce_iommu_group *tcegrp;
 
        if (!current->mm)
                return -ESRCH; /* process exited */
@@ -79,21 +253,38 @@ static int tce_iommu_enable(struct tce_container *container)
         * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
         * that would effectively kill the guest at random points, much better
         * enforcing the limit based on the max that the guest can map.
+        *
+        * Unfortunately at the moment it counts whole tables, no matter how
+        * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
+        * each with 2GB DMA window, 8GB will be counted here. The reason for
+        * this is that we cannot tell here the amount of RAM used by the guest
+        * as this information is only available from KVM and VFIO is
+        * KVM agnostic.
+        *
+        * So we do not allow enabling a container without a group attached
+        * as there is no way to know how much we should increment
+        * the locked_vm counter.
         */
-       down_write(&current->mm->mmap_sem);
-       npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-       locked = current->mm->locked_vm + npages;
-       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-       if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
-               pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
-                               rlimit(RLIMIT_MEMLOCK));
-               ret = -ENOMEM;
-       } else {
+       if (!tce_groups_attached(container))
+               return -ENODEV;
 
-               current->mm->locked_vm += npages;
-               container->enabled = true;
-       }
-       up_write(&current->mm->mmap_sem);
+       tcegrp = list_first_entry(&container->group_list,
+                       struct tce_iommu_group, next);
+       table_group = iommu_group_get_iommudata(tcegrp->grp);
+       if (!table_group)
+               return -ENODEV;
+
+       if (!table_group->tce32_size)
+               return -EPERM;
+
+       locked = table_group->tce32_size >> PAGE_SHIFT;
+       ret = try_increment_locked_vm(locked);
+       if (ret)
+               return ret;
+
+       container->locked_pages = locked;
+
+       container->enabled = true;
 
        return ret;
 }
@@ -105,20 +296,17 @@ static void tce_iommu_disable(struct tce_container *container)
 
        container->enabled = false;
 
-       if (!container->tbl || !current->mm)
+       if (!current->mm)
                return;
 
-       down_write(&current->mm->mmap_sem);
-       current->mm->locked_vm -= (container->tbl->it_size <<
-                       IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
-       up_write(&current->mm->mmap_sem);
+       decrement_locked_vm(container->locked_pages);
 }
 
 static void *tce_iommu_open(unsigned long arg)
 {
        struct tce_container *container;
 
-       if (arg != VFIO_SPAPR_TCE_IOMMU) {
+       if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
                pr_err("tce_vfio: Wrong IOMMU type\n");
                return ERR_PTR(-EINVAL);
        }
@@ -128,36 +316,411 @@ static void *tce_iommu_open(unsigned long arg)
                return ERR_PTR(-ENOMEM);
 
        mutex_init(&container->lock);
+       INIT_LIST_HEAD_RCU(&container->group_list);
+
+       container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
 
        return container;
 }
 
+static int tce_iommu_clear(struct tce_container *container,
+               struct iommu_table *tbl,
+               unsigned long entry, unsigned long pages);
+static void tce_iommu_free_table(struct iommu_table *tbl);
+
 static void tce_iommu_release(void *iommu_data)
 {
        struct tce_container *container = iommu_data;
+       struct iommu_table_group *table_group;
+       struct tce_iommu_group *tcegrp;
+       long i;
+
+       while (tce_groups_attached(container)) {
+               tcegrp = list_first_entry(&container->group_list,
+                               struct tce_iommu_group, next);
+               table_group = iommu_group_get_iommudata(tcegrp->grp);
+               tce_iommu_detach_group(iommu_data, tcegrp->grp);
+       }
 
-       WARN_ON(container->tbl && !container->tbl->it_group);
-       tce_iommu_disable(container);
+       /*
+        * If VFIO created a table, it was not disposed
+        * by tce_iommu_detach_group() so do it now.
+        */
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               struct iommu_table *tbl = container->tables[i];
+
+               if (!tbl)
+                       continue;
 
-       if (container->tbl && container->tbl->it_group)
-               tce_iommu_detach_group(iommu_data, container->tbl->it_group);
+               tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+               tce_iommu_free_table(tbl);
+       }
 
+       tce_iommu_disable(container);
        mutex_destroy(&container->lock);
 
        kfree(container);
 }
 
+static void tce_iommu_unuse_page(struct tce_container *container,
+               unsigned long hpa)
+{
+       struct page *page;
+
+       page = pfn_to_page(hpa >> PAGE_SHIFT);
+       put_page(page);
+}
+
+static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size,
+               unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
+{
+       long ret = 0;
+       struct mm_iommu_table_group_mem_t *mem;
+
+       mem = mm_iommu_lookup(tce, size);
+       if (!mem)
+               return -EINVAL;
+
+       ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
+       if (ret)
+               return -EINVAL;
+
+       *pmem = mem;
+
+       return 0;
+}
+
+static void tce_iommu_unuse_page_v2(struct iommu_table *tbl,
+               unsigned long entry)
+{
+       struct mm_iommu_table_group_mem_t *mem = NULL;
+       int ret;
+       unsigned long hpa = 0;
+       unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+       if (!pua || !current || !current->mm)
+               return;
+
+       ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl),
+                       &hpa, &mem);
+       if (ret)
+               pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
+                               __func__, *pua, entry, ret);
+       if (mem)
+               mm_iommu_mapped_dec(mem);
+
+       *pua = 0;
+}
+
+static int tce_iommu_clear(struct tce_container *container,
+               struct iommu_table *tbl,
+               unsigned long entry, unsigned long pages)
+{
+       unsigned long oldhpa;
+       long ret;
+       enum dma_data_direction direction;
+
+       for ( ; pages; --pages, ++entry) {
+               direction = DMA_NONE;
+               oldhpa = 0;
+               ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
+               if (ret)
+                       continue;
+
+               if (direction == DMA_NONE)
+                       continue;
+
+               if (container->v2) {
+                       tce_iommu_unuse_page_v2(tbl, entry);
+                       continue;
+               }
+
+               tce_iommu_unuse_page(container, oldhpa);
+       }
+
+       return 0;
+}
+
+static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
+{
+       struct page *page = NULL;
+       enum dma_data_direction direction = iommu_tce_direction(tce);
+
+       if (get_user_pages_fast(tce & PAGE_MASK, 1,
+                       direction != DMA_TO_DEVICE, &page) != 1)
+               return -EFAULT;
+
+       *hpa = __pa((unsigned long) page_address(page));
+
+       return 0;
+}
+
+static long tce_iommu_build(struct tce_container *container,
+               struct iommu_table *tbl,
+               unsigned long entry, unsigned long tce, unsigned long pages,
+               enum dma_data_direction direction)
+{
+       long i, ret = 0;
+       struct page *page;
+       unsigned long hpa;
+       enum dma_data_direction dirtmp;
+
+       for (i = 0; i < pages; ++i) {
+               unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
+
+               ret = tce_iommu_use_page(tce, &hpa);
+               if (ret)
+                       break;
+
+               page = pfn_to_page(hpa >> PAGE_SHIFT);
+               if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+                       ret = -EPERM;
+                       break;
+               }
+
+               hpa |= offset;
+               dirtmp = direction;
+               ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+               if (ret) {
+                       tce_iommu_unuse_page(container, hpa);
+                       pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
+                                       __func__, entry << tbl->it_page_shift,
+                                       tce, ret);
+                       break;
+               }
+
+               if (dirtmp != DMA_NONE)
+                       tce_iommu_unuse_page(container, hpa);
+
+               tce += IOMMU_PAGE_SIZE(tbl);
+       }
+
+       if (ret)
+               tce_iommu_clear(container, tbl, entry, i);
+
+       return ret;
+}
+
+static long tce_iommu_build_v2(struct tce_container *container,
+               struct iommu_table *tbl,
+               unsigned long entry, unsigned long tce, unsigned long pages,
+               enum dma_data_direction direction)
+{
+       long i, ret = 0;
+       struct page *page;
+       unsigned long hpa;
+       enum dma_data_direction dirtmp;
+
+       for (i = 0; i < pages; ++i) {
+               struct mm_iommu_table_group_mem_t *mem = NULL;
+               unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
+                               entry + i);
+
+               ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl),
+                               &hpa, &mem);
+               if (ret)
+                       break;
+
+               page = pfn_to_page(hpa >> PAGE_SHIFT);
+               if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+                       ret = -EPERM;
+                       break;
+               }
+
+               /* Preserve offset within IOMMU page */
+               hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
+               dirtmp = direction;
+
+               /* The registered region is being unregistered */
+               if (mm_iommu_mapped_inc(mem))
+                       break;
+
+               ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+               if (ret) {
+                       /* dirtmp cannot be DMA_NONE here */
+                       tce_iommu_unuse_page_v2(tbl, entry + i);
+                       pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
+                                       __func__, entry << tbl->it_page_shift,
+                                       tce, ret);
+                       break;
+               }
+
+               if (dirtmp != DMA_NONE)
+                       tce_iommu_unuse_page_v2(tbl, entry + i);
+
+               *pua = tce;
+
+               tce += IOMMU_PAGE_SIZE(tbl);
+       }
+
+       if (ret)
+               tce_iommu_clear(container, tbl, entry, i);
+
+       return ret;
+}
+
+static long tce_iommu_create_table(struct tce_container *container,
+                       struct iommu_table_group *table_group,
+                       int num,
+                       __u32 page_shift,
+                       __u64 window_size,
+                       __u32 levels,
+                       struct iommu_table **ptbl)
+{
+       long ret, table_size;
+
+       table_size = table_group->ops->get_table_size(page_shift, window_size,
+                       levels);
+       if (!table_size)
+               return -EINVAL;
+
+       ret = try_increment_locked_vm(table_size >> PAGE_SHIFT);
+       if (ret)
+               return ret;
+
+       ret = table_group->ops->create_table(table_group, num,
+                       page_shift, window_size, levels, ptbl);
+
+       WARN_ON(!ret && !(*ptbl)->it_ops->free);
+       WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
+
+       if (!ret && container->v2) {
+               ret = tce_iommu_userspace_view_alloc(*ptbl);
+               if (ret)
+                       (*ptbl)->it_ops->free(*ptbl);
+       }
+
+       if (ret)
+               decrement_locked_vm(table_size >> PAGE_SHIFT);
+
+       return ret;
+}
+
+static void tce_iommu_free_table(struct iommu_table *tbl)
+{
+       unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
+
+       tce_iommu_userspace_view_free(tbl);
+       tbl->it_ops->free(tbl);
+       decrement_locked_vm(pages);
+}
+
+static long tce_iommu_create_window(struct tce_container *container,
+               __u32 page_shift, __u64 window_size, __u32 levels,
+               __u64 *start_addr)
+{
+       struct tce_iommu_group *tcegrp;
+       struct iommu_table_group *table_group;
+       struct iommu_table *tbl = NULL;
+       long ret, num;
+
+       num = tce_iommu_find_free_table(container);
+       if (num < 0)
+               return num;
+
+       /* Get the first group for ops::create_table */
+       tcegrp = list_first_entry(&container->group_list,
+                       struct tce_iommu_group, next);
+       table_group = iommu_group_get_iommudata(tcegrp->grp);
+       if (!table_group)
+               return -EFAULT;
+
+       if (!(table_group->pgsizes & (1ULL << page_shift)))
+               return -EINVAL;
+
+       if (!table_group->ops->set_window || !table_group->ops->unset_window ||
+                       !table_group->ops->get_table_size ||
+                       !table_group->ops->create_table)
+               return -EPERM;
+
+       /* Create TCE table */
+       ret = tce_iommu_create_table(container, table_group, num,
+                       page_shift, window_size, levels, &tbl);
+       if (ret)
+               return ret;
+
+       BUG_ON(!tbl->it_ops->free);
+
+       /*
+        * Program the table to every group.
+        * Groups have been tested for compatibility at the attach time.
+        */
+       list_for_each_entry(tcegrp, &container->group_list, next) {
+               table_group = iommu_group_get_iommudata(tcegrp->grp);
+
+               ret = table_group->ops->set_window(table_group, num, tbl);
+               if (ret)
+                       goto unset_exit;
+       }
+
+       container->tables[num] = tbl;
+
+       /* Return start address assigned by platform in create_table() */
+       *start_addr = tbl->it_offset << tbl->it_page_shift;
+
+       return 0;
+
+unset_exit:
+       list_for_each_entry(tcegrp, &container->group_list, next) {
+               table_group = iommu_group_get_iommudata(tcegrp->grp);
+               table_group->ops->unset_window(table_group, num);
+       }
+       tce_iommu_free_table(tbl);
+
+       return ret;
+}
+
+static long tce_iommu_remove_window(struct tce_container *container,
+               __u64 start_addr)
+{
+       struct iommu_table_group *table_group = NULL;
+       struct iommu_table *tbl;
+       struct tce_iommu_group *tcegrp;
+       int num;
+
+       num = tce_iommu_find_table(container, start_addr, &tbl);
+       if (num < 0)
+               return -EINVAL;
+
+       BUG_ON(!tbl->it_size);
+
+       /* Detach groups from IOMMUs */
+       list_for_each_entry(tcegrp, &container->group_list, next) {
+               table_group = iommu_group_get_iommudata(tcegrp->grp);
+
+               /*
+                * SPAPR TCE IOMMU exposes the default DMA window to
+                * the guest via dma32_window_start/size of
+                * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
+                * the userspace to remove this window, some do not so
+                * here we check for the platform capability.
+                */
+               if (!table_group->ops || !table_group->ops->unset_window)
+                       return -EPERM;
+
+               table_group->ops->unset_window(table_group, num);
+       }
+
+       /* Free table */
+       tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+       tce_iommu_free_table(tbl);
+       container->tables[num] = NULL;
+
+       return 0;
+}
+
 static long tce_iommu_ioctl(void *iommu_data,
                                 unsigned int cmd, unsigned long arg)
 {
        struct tce_container *container = iommu_data;
-       unsigned long minsz;
+       unsigned long minsz, ddwsz;
        long ret;
 
        switch (cmd) {
        case VFIO_CHECK_EXTENSION:
                switch (arg) {
                case VFIO_SPAPR_TCE_IOMMU:
+               case VFIO_SPAPR_TCE_v2_IOMMU:
                        ret = 1;
                        break;
                default:
@@ -169,9 +732,17 @@ static long tce_iommu_ioctl(void *iommu_data,
 
        case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
                struct vfio_iommu_spapr_tce_info info;
-               struct iommu_table *tbl = container->tbl;
+               struct tce_iommu_group *tcegrp;
+               struct iommu_table_group *table_group;
+
+               if (!tce_groups_attached(container))
+                       return -ENXIO;
+
+               tcegrp = list_first_entry(&container->group_list,
+                               struct tce_iommu_group, next);
+               table_group = iommu_group_get_iommudata(tcegrp->grp);
 
-               if (WARN_ON(!tbl))
+               if (!table_group)
                        return -ENXIO;
 
                minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
@@ -183,9 +754,24 @@ static long tce_iommu_ioctl(void *iommu_data,
                if (info.argsz < minsz)
                        return -EINVAL;
 
-               info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K;
-               info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K;
+               info.dma32_window_start = table_group->tce32_start;
+               info.dma32_window_size = table_group->tce32_size;
                info.flags = 0;
+               memset(&info.ddw, 0, sizeof(info.ddw));
+
+               if (table_group->max_dynamic_windows_supported &&
+                               container->v2) {
+                       info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
+                       info.ddw.pgsizes = table_group->pgsizes;
+                       info.ddw.max_dynamic_windows_supported =
+                               table_group->max_dynamic_windows_supported;
+                       info.ddw.levels = table_group->max_levels;
+               }
+
+               ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
+
+               if (info.argsz >= ddwsz)
+                       minsz = ddwsz;
 
                if (copy_to_user((void __user *)arg, &info, minsz))
                        return -EFAULT;
@@ -194,13 +780,12 @@ static long tce_iommu_ioctl(void *iommu_data,
        }
        case VFIO_IOMMU_MAP_DMA: {
                struct vfio_iommu_type1_dma_map param;
-               struct iommu_table *tbl = container->tbl;
-               unsigned long tce, i;
+               struct iommu_table *tbl = NULL;
+               long num;
+               enum dma_data_direction direction;
 
-               if (!tbl)
-                       return -ENXIO;
-
-               BUG_ON(!tbl->it_group);
+               if (!container->enabled)
+                       return -EPERM;
 
                minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
 
@@ -214,32 +799,43 @@ static long tce_iommu_ioctl(void *iommu_data,
                                VFIO_DMA_MAP_FLAG_WRITE))
                        return -EINVAL;
 
-               if ((param.size & ~IOMMU_PAGE_MASK_4K) ||
-                               (param.vaddr & ~IOMMU_PAGE_MASK_4K))
+               num = tce_iommu_find_table(container, param.iova, &tbl);
+               if (num < 0)
+                       return -ENXIO;
+
+               if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
+                               (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
                        return -EINVAL;
 
                /* iova is checked by the IOMMU API */
-               tce = param.vaddr;
-               if (param.flags & VFIO_DMA_MAP_FLAG_READ)
-                       tce |= TCE_PCI_READ;
-               if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
-                       tce |= TCE_PCI_WRITE;
+               if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
+                       if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
+                               direction = DMA_BIDIRECTIONAL;
+                       else
+                               direction = DMA_TO_DEVICE;
+               } else {
+                       if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
+                               direction = DMA_FROM_DEVICE;
+                       else
+                               return -EINVAL;
+               }
 
-               ret = iommu_tce_put_param_check(tbl, param.iova, tce);
+               ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
                if (ret)
                        return ret;
 
-               for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) {
-                       ret = iommu_put_tce_user_mode(tbl,
-                                       (param.iova >> IOMMU_PAGE_SHIFT_4K) + i,
-                                       tce);
-                       if (ret)
-                               break;
-                       tce += IOMMU_PAGE_SIZE_4K;
-               }
-               if (ret)
-                       iommu_clear_tces_and_put_pages(tbl,
-                                       param.iova >> IOMMU_PAGE_SHIFT_4K, i);
+               if (container->v2)
+                       ret = tce_iommu_build_v2(container, tbl,
+                                       param.iova >> tbl->it_page_shift,
+                                       param.vaddr,
+                                       param.size >> tbl->it_page_shift,
+                                       direction);
+               else
+                       ret = tce_iommu_build(container, tbl,
+                                       param.iova >> tbl->it_page_shift,
+                                       param.vaddr,
+                                       param.size >> tbl->it_page_shift,
+                                       direction);
 
                iommu_flush_tce(tbl);
 
@@ -247,10 +843,11 @@ static long tce_iommu_ioctl(void *iommu_data,
        }
        case VFIO_IOMMU_UNMAP_DMA: {
                struct vfio_iommu_type1_dma_unmap param;
-               struct iommu_table *tbl = container->tbl;
+               struct iommu_table *tbl = NULL;
+               long num;
 
-               if (WARN_ON(!tbl))
-                       return -ENXIO;
+               if (!container->enabled)
+                       return -EPERM;
 
                minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
                                size);
@@ -265,22 +862,81 @@ static long tce_iommu_ioctl(void *iommu_data,
                if (param.flags)
                        return -EINVAL;
 
-               if (param.size & ~IOMMU_PAGE_MASK_4K)
+               num = tce_iommu_find_table(container, param.iova, &tbl);
+               if (num < 0)
+                       return -ENXIO;
+
+               if (param.size & ~IOMMU_PAGE_MASK(tbl))
                        return -EINVAL;
 
                ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
-                               param.size >> IOMMU_PAGE_SHIFT_4K);
+                               param.size >> tbl->it_page_shift);
                if (ret)
                        return ret;
 
-               ret = iommu_clear_tces_and_put_pages(tbl,
-                               param.iova >> IOMMU_PAGE_SHIFT_4K,
-                               param.size >> IOMMU_PAGE_SHIFT_4K);
+               ret = tce_iommu_clear(container, tbl,
+                               param.iova >> tbl->it_page_shift,
+                               param.size >> tbl->it_page_shift);
                iommu_flush_tce(tbl);
 
                return ret;
        }
+       case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
+               struct vfio_iommu_spapr_register_memory param;
+
+               if (!container->v2)
+                       break;
+
+               minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
+                               size);
+
+               if (copy_from_user(&param, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (param.argsz < minsz)
+                       return -EINVAL;
+
+               /* No flag is supported now */
+               if (param.flags)
+                       return -EINVAL;
+
+               mutex_lock(&container->lock);
+               ret = tce_iommu_register_pages(container, param.vaddr,
+                               param.size);
+               mutex_unlock(&container->lock);
+
+               return ret;
+       }
+       case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
+               struct vfio_iommu_spapr_register_memory param;
+
+               if (!container->v2)
+                       break;
+
+               minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
+                               size);
+
+               if (copy_from_user(&param, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (param.argsz < minsz)
+                       return -EINVAL;
+
+               /* No flag is supported now */
+               if (param.flags)
+                       return -EINVAL;
+
+               mutex_lock(&container->lock);
+               ret = tce_iommu_unregister_pages(container, param.vaddr,
+                               param.size);
+               mutex_unlock(&container->lock);
+
+               return ret;
+       }
        case VFIO_IOMMU_ENABLE:
+               if (container->v2)
+                       break;
+
                mutex_lock(&container->lock);
                ret = tce_iommu_enable(container);
                mutex_unlock(&container->lock);
@@ -288,48 +944,280 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 
        case VFIO_IOMMU_DISABLE:
+               if (container->v2)
+                       break;
+
                mutex_lock(&container->lock);
                tce_iommu_disable(container);
                mutex_unlock(&container->lock);
                return 0;
-       case VFIO_EEH_PE_OP:
-               if (!container->tbl || !container->tbl->it_group)
-                       return -ENODEV;
 
-               return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
-                                                 cmd, arg);
+       case VFIO_EEH_PE_OP: {
+               struct tce_iommu_group *tcegrp;
+
+               ret = 0;
+               list_for_each_entry(tcegrp, &container->group_list, next) {
+                       ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
+                                       cmd, arg);
+                       if (ret)
+                               return ret;
+               }
+               return ret;
+       }
+
+       case VFIO_IOMMU_SPAPR_TCE_CREATE: {
+               struct vfio_iommu_spapr_tce_create create;
+
+               if (!container->v2)
+                       break;
+
+               if (!tce_groups_attached(container))
+                       return -ENXIO;
+
+               minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
+                               start_addr);
+
+               if (copy_from_user(&create, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (create.argsz < minsz)
+                       return -EINVAL;
+
+               if (create.flags)
+                       return -EINVAL;
+
+               mutex_lock(&container->lock);
+
+               ret = tce_iommu_create_window(container, create.page_shift,
+                               create.window_size, create.levels,
+                               &create.start_addr);
+
+               mutex_unlock(&container->lock);
+
+               if (!ret && copy_to_user((void __user *)arg, &create, minsz))
+                       ret = -EFAULT;
+
+               return ret;
+       }
+       case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
+               struct vfio_iommu_spapr_tce_remove remove;
+
+               if (!container->v2)
+                       break;
+
+               if (!tce_groups_attached(container))
+                       return -ENXIO;
+
+               minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
+                               start_addr);
+
+               if (copy_from_user(&remove, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (remove.argsz < minsz)
+                       return -EINVAL;
+
+               if (remove.flags)
+                       return -EINVAL;
+
+               mutex_lock(&container->lock);
+
+               ret = tce_iommu_remove_window(container, remove.start_addr);
+
+               mutex_unlock(&container->lock);
+
+               return ret;
+       }
        }
 
        return -ENOTTY;
 }
 
+static void tce_iommu_release_ownership(struct tce_container *container,
+               struct iommu_table_group *table_group)
+{
+       int i;
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               struct iommu_table *tbl = container->tables[i];
+
+               if (!tbl)
+                       continue;
+
+               tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+               tce_iommu_userspace_view_free(tbl);
+               if (tbl->it_map)
+                       iommu_release_ownership(tbl);
+
+               container->tables[i] = NULL;
+       }
+}
+
+static int tce_iommu_take_ownership(struct tce_container *container,
+               struct iommu_table_group *table_group)
+{
+       int i, j, rc = 0;
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               struct iommu_table *tbl = table_group->tables[i];
+
+               if (!tbl || !tbl->it_map)
+                       continue;
+
+               rc = tce_iommu_userspace_view_alloc(tbl);
+               if (!rc)
+                       rc = iommu_take_ownership(tbl);
+
+               if (rc) {
+                       for (j = 0; j < i; ++j)
+                               iommu_release_ownership(
+                                               table_group->tables[j]);
+
+                       return rc;
+               }
+       }
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+               container->tables[i] = table_group->tables[i];
+
+       return 0;
+}
+
+static void tce_iommu_release_ownership_ddw(struct tce_container *container,
+               struct iommu_table_group *table_group)
+{
+       long i;
+
+       if (!table_group->ops->unset_window) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+               table_group->ops->unset_window(table_group, i);
+
+       table_group->ops->release_ownership(table_group);
+}
+
+static long tce_iommu_take_ownership_ddw(struct tce_container *container,
+               struct iommu_table_group *table_group)
+{
+       long i, ret = 0;
+       struct iommu_table *tbl = NULL;
+
+       if (!table_group->ops->create_table || !table_group->ops->set_window ||
+                       !table_group->ops->release_ownership) {
+               WARN_ON_ONCE(1);
+               return -EFAULT;
+       }
+
+       table_group->ops->take_ownership(table_group);
+
+       /*
+        * If it the first group attached, check if there is
+        * a default DMA window and create one if none as
+        * the userspace expects it to exist.
+        */
+       if (!tce_groups_attached(container) && !container->tables[0]) {
+               ret = tce_iommu_create_table(container,
+                               table_group,
+                               0, /* window number */
+                               IOMMU_PAGE_SHIFT_4K,
+                               table_group->tce32_size,
+                               1, /* default levels */
+                               &tbl);
+               if (ret)
+                       goto release_exit;
+               else
+                       container->tables[0] = tbl;
+       }
+
+       /* Set all windows to the new group */
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               tbl = container->tables[i];
+
+               if (!tbl)
+                       continue;
+
+               /* Set the default window to a new group */
+               ret = table_group->ops->set_window(table_group, i, tbl);
+               if (ret)
+                       goto release_exit;
+       }
+
+       return 0;
+
+release_exit:
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+               table_group->ops->unset_window(table_group, i);
+
+       table_group->ops->release_ownership(table_group);
+
+       return ret;
+}
+
 static int tce_iommu_attach_group(void *iommu_data,
                struct iommu_group *iommu_group)
 {
        int ret;
        struct tce_container *container = iommu_data;
-       struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
+       struct iommu_table_group *table_group;
+       struct tce_iommu_group *tcegrp = NULL;
 
-       BUG_ON(!tbl);
        mutex_lock(&container->lock);
 
        /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
                        iommu_group_id(iommu_group), iommu_group); */
-       if (container->tbl) {
-               pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
-                               iommu_group_id(container->tbl->it_group),
-                               iommu_group_id(iommu_group));
-               ret = -EBUSY;
-       } else if (container->enabled) {
-               pr_err("tce_vfio: attaching group #%u to enabled container\n",
-                               iommu_group_id(iommu_group));
+       table_group = iommu_group_get_iommudata(iommu_group);
+
+       if (tce_groups_attached(container) && (!table_group->ops ||
+                       !table_group->ops->take_ownership ||
+                       !table_group->ops->release_ownership)) {
                ret = -EBUSY;
-       } else {
-               ret = iommu_take_ownership(tbl);
-               if (!ret)
-                       container->tbl = tbl;
+               goto unlock_exit;
+       }
+
+       /* Check if new group has the same iommu_ops (i.e. compatible) */
+       list_for_each_entry(tcegrp, &container->group_list, next) {
+               struct iommu_table_group *table_group_tmp;
+
+               if (tcegrp->grp == iommu_group) {
+                       pr_warn("tce_vfio: Group %d is already attached\n",
+                                       iommu_group_id(iommu_group));
+                       ret = -EBUSY;
+                       goto unlock_exit;
+               }
+               table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
+               if (table_group_tmp->ops != table_group->ops) {
+                       pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
+                                       iommu_group_id(iommu_group),
+                                       iommu_group_id(tcegrp->grp));
+                       ret = -EPERM;
+                       goto unlock_exit;
+               }
+       }
+
+       tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
+       if (!tcegrp) {
+               ret = -ENOMEM;
+               goto unlock_exit;
        }
 
+       if (!table_group->ops || !table_group->ops->take_ownership ||
+                       !table_group->ops->release_ownership)
+               ret = tce_iommu_take_ownership(container, table_group);
+       else
+               ret = tce_iommu_take_ownership_ddw(container, table_group);
+
+       if (!ret) {
+               tcegrp->grp = iommu_group;
+               list_add(&tcegrp->next, &container->group_list);
+       }
+
+unlock_exit:
+       if (ret && tcegrp)
+               kfree(tcegrp);
+
        mutex_unlock(&container->lock);
 
        return ret;
@@ -339,26 +1227,37 @@ static void tce_iommu_detach_group(void *iommu_data,
                struct iommu_group *iommu_group)
 {
        struct tce_container *container = iommu_data;
-       struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
+       struct iommu_table_group *table_group;
+       bool found = false;
+       struct tce_iommu_group *tcegrp;
 
-       BUG_ON(!tbl);
        mutex_lock(&container->lock);
-       if (tbl != container->tbl) {
-               pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
-                               iommu_group_id(iommu_group),
-                               iommu_group_id(tbl->it_group));
-       } else {
-               if (container->enabled) {
-                       pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
-                                       iommu_group_id(tbl->it_group));
-                       tce_iommu_disable(container);
+
+       list_for_each_entry(tcegrp, &container->group_list, next) {
+               if (tcegrp->grp == iommu_group) {
+                       found = true;
+                       break;
                }
+       }
 
-               /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
-                               iommu_group_id(iommu_group), iommu_group); */
-               container->tbl = NULL;
-               iommu_release_ownership(tbl);
+       if (!found) {
+               pr_warn("tce_vfio: detaching unattached group #%u\n",
+                               iommu_group_id(iommu_group));
+               goto unlock_exit;
        }
+
+       list_del(&tcegrp->next);
+       kfree(tcegrp);
+
+       table_group = iommu_group_get_iommudata(iommu_group);
+       BUG_ON(!table_group);
+
+       if (!table_group->ops || !table_group->ops->release_ownership)
+               tce_iommu_release_ownership(container, table_group);
+       else
+               tce_iommu_release_ownership_ddw(container, table_group);
+
+unlock_exit:
        mutex_unlock(&container->lock);
 }
 
index 5fa42db769ee8e5d9d88edd8dbccc71102290ad1..38edeb4729a9d475445bffab51c25ead9c6ae5bb 100644 (file)
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
                case VFIO_EEH_PE_CONFIGURE:
                        ret = eeh_pe_configure(pe);
                        break;
+               case VFIO_EEH_PE_INJECT_ERR:
+                       minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+                       if (op.argsz < minsz)
+                               return -EINVAL;
+                       if (copy_from_user(&op, (void __user *)arg, minsz))
+                               return -EFAULT;
+
+                       ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+                                               op.err.addr, op.err.mask);
+                       break;
                default:
                        ret = -EINVAL;
                }
diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h
new file mode 100644 (file)
index 0000000..5ae9625
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _MISC_CXL_BASE_H
+#define _MISC_CXL_BASE_H
+
+#ifdef CONFIG_CXL_BASE
+
+#define CXL_IRQ_RANGES 4
+
+struct cxl_irq_ranges {
+       irq_hw_number_t offset[CXL_IRQ_RANGES];
+       irq_hw_number_t range[CXL_IRQ_RANGES];
+};
+
+extern atomic_t cxl_use_count;
+
+static inline bool cxl_ctx_in_use(void)
+{
+       return (atomic_read(&cxl_use_count) != 0);
+}
+
+static inline void cxl_ctx_get(void)
+{
+       atomic_inc(&cxl_use_count);
+}
+
+static inline void cxl_ctx_put(void)
+{
+       atomic_dec(&cxl_use_count);
+}
+
+void cxl_slbia(struct mm_struct *mm);
+
+#else /* CONFIG_CXL_BASE */
+
+static inline bool cxl_ctx_in_use(void) { return false; }
+static inline void cxl_slbia(struct mm_struct *mm) {}
+
+#endif /* CONFIG_CXL_BASE */
+
+#endif
index 975cc7861f184afc255397b87f926c18269bcbf8..7a6c1d6cc1732e1fc83432cd46e65ed25a5fa4ef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2014 IBM Corp.
+ * Copyright 2015 IBM Corp.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
 #ifndef _MISC_CXL_H
 #define _MISC_CXL_H
 
-#ifdef CONFIG_CXL_BASE
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/interrupt.h>
+#include <uapi/misc/cxl.h>
 
-#define CXL_IRQ_RANGES 4
+/*
+ * This documents the in kernel API for driver to use CXL. It allows kernel
+ * drivers to bind to AFUs using an AFU configuration record exposed as a PCI
+ * configuration record.
+ *
+ * This API enables control over AFU and contexts which can't be part of the
+ * generic PCI API. This API is agnostic to the actual AFU.
+ */
+
+/* Get the AFU associated with a pci_dev */
+struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev);
+
+/* Get the AFU conf record number associated with a pci_dev */
+unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev);
+
+/* Get the physical device (ie. the PCIe card) which the AFU is attached */
+struct device *cxl_get_phys_dev(struct pci_dev *dev);
+
+
+/*
+ * Context lifetime overview:
+ *
+ * An AFU context may be inited and then started and stoppped multiple times
+ * before it's released. ie.
+ *    - cxl_dev_context_init()
+ *      - cxl_start_context()
+ *      - cxl_stop_context()
+ *      - cxl_start_context()
+ *      - cxl_stop_context()
+ *     ...repeat...
+ *    - cxl_release_context()
+ * Once released, a context can't be started again.
+ *
+ * One context is inited by the cxl driver for every pci_dev. This is to be
+ * used as a default kernel context. cxl_get_context() will get this
+ * context. This context will be released by PCI hot unplug, so doesn't need to
+ * be released explicitly by drivers.
+ *
+ * Additional kernel contexts may be inited using cxl_dev_context_init().
+ * These must be released using cxl_context_detach().
+ *
+ * Once a context has been inited, IRQs may be configured. Firstly these IRQs
+ * must be allocated (cxl_allocate_afu_irqs()), then individually mapped to
+ * specific handlers (cxl_map_afu_irq()).
+ *
+ * These IRQs can be unmapped (cxl_unmap_afu_irq()) and finally released
+ * (cxl_free_afu_irqs()).
+ *
+ * The AFU can be reset (cxl_afu_reset()). This will cause the PSL/AFU
+ * hardware to lose track of all contexts. It's upto the caller of
+ * cxl_afu_reset() to restart these contexts.
+ */
+
+/*
+ * On pci_enabled_device(), the cxl driver will init a single cxl context for
+ * use by the driver. It doesn't start this context (as that will likely
+ * generate DMA traffic for most AFUs).
+ *
+ * This gets the default context associated with this pci_dev.  This context
+ * doesn't need to be released as this will be done by the PCI subsystem on hot
+ * unplug.
+ */
+struct cxl_context *cxl_get_context(struct pci_dev *dev);
+/*
+ * Allocate and initalise a context associated with a AFU PCI device. This
+ * doesn't start the context in the AFU.
+ */
+struct cxl_context *cxl_dev_context_init(struct pci_dev *dev);
+/*
+ * Release and free a context. Context should be stopped before calling.
+ */
+int cxl_release_context(struct cxl_context *ctx);
 
-struct cxl_irq_ranges {
-       irq_hw_number_t offset[CXL_IRQ_RANGES];
-       irq_hw_number_t range[CXL_IRQ_RANGES];
-};
+/*
+ * Allocate AFU interrupts for this context. num=0 will allocate the default
+ * for this AFU as given in the AFU descriptor. This number doesn't include the
+ * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
+ * used must map a handler with cxl_map_afu_irq.
+ */
+int cxl_allocate_afu_irqs(struct cxl_context *cxl, int num);
+/* Free allocated interrupts */
+void cxl_free_afu_irqs(struct cxl_context *cxl);
+
+/*
+ * Map a handler for an AFU interrupt associated with a particular context. AFU
+ * IRQS numbers start from 1 (CAIA defines AFU IRQ 0 for page faults). cookie
+ * is private data is that will be provided to the interrupt handler.
+ */
+int cxl_map_afu_irq(struct cxl_context *cxl, int num,
+                   irq_handler_t handler, void *cookie, char *name);
+/* unmap mapped IRQ handlers */
+void cxl_unmap_afu_irq(struct cxl_context *cxl, int num, void *cookie);
 
-extern atomic_t cxl_use_count;
+/*
+ * Start work on the AFU. This starts an cxl context and associates it with a
+ * task. task == NULL will make it a kernel context.
+ */
+int cxl_start_context(struct cxl_context *ctx, u64 wed,
+                     struct task_struct *task);
+/*
+ * Stop a context and remove it from the PSL
+ */
+int cxl_stop_context(struct cxl_context *ctx);
 
-static inline bool cxl_ctx_in_use(void)
-{
-       return (atomic_read(&cxl_use_count) != 0);
-}
+/* Reset the AFU */
+int cxl_afu_reset(struct cxl_context *ctx);
 
-static inline void cxl_ctx_get(void)
-{
-       atomic_inc(&cxl_use_count);
-}
+/*
+ * Set a context as a master context.
+ * This sets the default problem space area mapped as the full space, rather
+ * than just the per context area (for slaves).
+ */
+void cxl_set_master(struct cxl_context *ctx);
 
-static inline void cxl_ctx_put(void)
-{
-       atomic_dec(&cxl_use_count);
-}
+/*
+ * Map and unmap the AFU Problem Space area. The amount and location mapped
+ * depends on if this context is a master or slave.
+ */
+void __iomem *cxl_psa_map(struct cxl_context *ctx);
+void cxl_psa_unmap(void __iomem *addr);
 
-void cxl_slbia(struct mm_struct *mm);
+/*  Get the process element for this context */
+int cxl_process_element(struct cxl_context *ctx);
 
-#else /* CONFIG_CXL_BASE */
 
-static inline bool cxl_ctx_in_use(void) { return false; }
-static inline void cxl_slbia(struct mm_struct *mm) {}
+/*
+ * These calls allow drivers to create their own file descriptors and make them
+ * identical to the cxl file descriptor user API. An example use case:
+ *
+ * struct file_operations cxl_my_fops = {};
+ * ......
+ *     // Init the context
+ *     ctx = cxl_dev_context_init(dev);
+ *     if (IS_ERR(ctx))
+ *             return PTR_ERR(ctx);
+ *     // Create and attach a new file descriptor to my file ops
+ *     file = cxl_get_fd(ctx, &cxl_my_fops, &fd);
+ *     // Start context
+ *     rc = cxl_start_work(ctx, &work.work);
+ *     if (rc) {
+ *             fput(file);
+ *             put_unused_fd(fd);
+ *             return -ENODEV;
+ *     }
+ *     // No error paths after installing the fd
+ *     fd_install(fd, file);
+ *     return fd;
+ *
+ * This inits a context, and gets a file descriptor and associates some file
+ * ops to that file descriptor. If the file ops are blank, the cxl driver will
+ * fill them in with the default ones that mimic the standard user API.  Once
+ * completed, the file descriptor can be installed. Once the file descriptor is
+ * installed, it's visible to the user so no errors must occur past this point.
+ *
+ * If cxl_fd_release() file op call is installed, the context will be stopped
+ * and released when the fd is released. Hence the driver won't need to manage
+ * this itself.
+ */
 
-#endif /* CONFIG_CXL_BASE */
+/*
+ * Take a context and associate it with my file ops. Returns the associated
+ * file and file descriptor. Any file ops which are blank are filled in by the
+ * cxl driver with the default ops to mimic the standard API.
+ */
+struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
+                       int *fd);
+/* Get the context associated with this file */
+struct cxl_context *cxl_fops_get_context(struct file *file);
+/*
+ * Start a context associated a struct cxl_ioctl_start_work used by the
+ * standard cxl user API.
+ */
+int cxl_start_work(struct cxl_context *ctx,
+                  struct cxl_ioctl_start_work *work);
+/*
+ * Export all the existing fops so drivers can use them
+ */
+int cxl_fd_open(struct inode *inode, struct file *file);
+int cxl_fd_release(struct inode *inode, struct file *file);
+long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm);
+unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll);
+ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
+                          loff_t *off);
 
-#endif
+#endif /* _MISC_CXL_H */
index b57b750c222f1d5729b7b171a9131e0e1ed9ccd5..9fd7b5d8df2fa357f434a899cccfa7810f826107 100644 (file)
@@ -36,6 +36,8 @@
 /* Two-stage IOMMU */
 #define VFIO_TYPE1_NESTING_IOMMU       6       /* Implies v2 */
 
+#define VFIO_SPAPR_TCE_v2_IOMMU                7
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -442,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap {
 
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
+/*
+ * The SPAPR TCE DDW info struct provides the information about
+ * the details of Dynamic DMA window capability.
+ *
+ * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
+ * @max_dynamic_windows_supported tells the maximum number of windows
+ * which the platform can create.
+ * @levels tells the maximum number of levels in multi-level IOMMU tables;
+ * this allows splitting a table into smaller chunks which reduces
+ * the amount of physically contiguous memory required for the table.
+ */
+struct vfio_iommu_spapr_tce_ddw_info {
+       __u64 pgsizes;                  /* Bitmap of supported page sizes */
+       __u32 max_dynamic_windows_supported;
+       __u32 levels;
+};
+
 /*
  * The SPAPR TCE info struct provides the information about the PCI bus
  * address ranges available for DMA, these values are programmed into
@@ -452,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap {
  * addresses too so the window works as a filter rather than an offset
  * for IOVA addresses.
  *
- * A flag will need to be added if other page sizes are supported,
- * so as defined here, it is always 4k.
+ * Flags supported:
+ * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
+ *   (DDW) support is present. @ddw is only supported when DDW is present.
  */
 struct vfio_iommu_spapr_tce_info {
        __u32 argsz;
-       __u32 flags;                    /* reserved for future use */
+       __u32 flags;
+#define VFIO_IOMMU_SPAPR_INFO_DDW      (1 << 0)        /* DDW supported */
        __u32 dma32_window_start;       /* 32 bit window start (bytes) */
        __u32 dma32_window_size;        /* 32 bit window size (bytes) */
+       struct vfio_iommu_spapr_tce_ddw_info ddw;
 };
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO  _IO(VFIO_TYPE, VFIO_BASE + 12)
@@ -470,12 +492,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+       __u32 type;
+       __u32 func;
+       __u64 addr;
+       __u64 mask;
+};
+
 struct vfio_eeh_pe_op {
        __u32 argsz;
        __u32 flags;
        __u32 op;
+       union {
+               struct vfio_eeh_pe_err err;
+       };
 };
 
 #define VFIO_EEH_PE_DISABLE            0       /* Disable EEH functionality */
@@ -492,9 +525,70 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT          6       /* Assert hot reset          */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7       /* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE          8       /* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR         9       /* Inject EEH error          */
 
 #define VFIO_EEH_PE_OP                 _IO(VFIO_TYPE, VFIO_BASE + 21)
 
+/**
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
+ *
+ * Registers user space memory where DMA is allowed. It pins
+ * user pages and does the locked memory accounting so
+ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
+ * get faster.
+ */
+struct vfio_iommu_spapr_register_memory {
+       __u32   argsz;
+       __u32   flags;
+       __u64   vaddr;                          /* Process virtual address */
+       __u64   size;                           /* Size of mapping (bytes) */
+};
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY       _IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/**
+ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
+ *
+ * Unregisters user space memory registered with
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
+ * Uses vfio_iommu_spapr_register_memory for parameters.
+ */
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY     _IO(VFIO_TYPE, VFIO_BASE + 18)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
+ *
+ * Creates an additional TCE table and programs it (sets a new DMA window)
+ * to every IOMMU group in the container. It receives page shift, window
+ * size and number of levels in the TCE table being created.
+ *
+ * It allocates and returns an offset on a PCI bus of the new DMA window.
+ */
+struct vfio_iommu_spapr_tce_create {
+       __u32 argsz;
+       __u32 flags;
+       /* in */
+       __u32 page_shift;
+       __u64 window_size;
+       __u32 levels;
+       /* out */
+       __u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_CREATE    _IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
+ *
+ * Unprograms a TCE table from all groups in the container and destroys it.
+ * It receives a PCI bus offset as a window id.
+ */
+struct vfio_iommu_spapr_tce_remove {
+       __u32 argsz;
+       __u32 flags;
+       /* in */
+       __u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE    _IO(VFIO_TYPE, VFIO_BASE + 20)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
index cd6d789b73ec4cd15c116f2302c305db845f9385..99a8ca15fe648c9d176325eb2ed9c952a5ac7088 100644 (file)
@@ -32,10 +32,32 @@ struct cxl_ioctl_start_work {
 #define CXL_START_WORK_ALL             (CXL_START_WORK_AMR |\
                                         CXL_START_WORK_NUM_IRQS)
 
+
+/* Possible modes that an afu can be in */
+#define CXL_MODE_DEDICATED   0x1
+#define CXL_MODE_DIRECTED    0x2
+
+/* possible flags for the cxl_afu_id flags field */
+#define CXL_AFUID_FLAG_SLAVE    0x1  /* In directed-mode afu is in slave mode */
+
+struct cxl_afu_id {
+       __u64 flags;     /* One of CXL_AFUID_FLAG_X */
+       __u32 card_id;
+       __u32 afu_offset;
+       __u32 afu_mode;  /* one of the CXL_MODE_X */
+       __u32 reserved1;
+       __u64 reserved2;
+       __u64 reserved3;
+       __u64 reserved4;
+       __u64 reserved5;
+       __u64 reserved6;
+};
+
 /* ioctl numbers */
 #define CXL_MAGIC 0xCA
 #define CXL_IOCTL_START_WORK           _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work)
 #define CXL_IOCTL_GET_PROCESS_ELEMENT  _IOR(CXL_MAGIC, 0x01, __u32)
+#define CXL_IOCTL_GET_AFU_ID            _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id)
 
 #define CXL_READ_MIN_SIZE 0x1000 /* 4K */
 
index c7dab0645554ea341590bee0ecf5b0a0139a55bd..3b10a48fa0401ff87902525095c9e1d189bfe7dd 100644 (file)
@@ -15,7 +15,7 @@ quiet_cmd_unroll = UNROLL  $@
                    < $< > $@ || ( rm -f $@ && exit 1 )
 
 ifeq ($(CONFIG_ALTIVEC),y)
-altivec_flags := -maltivec -mabi=altivec
+altivec_flags := -maltivec $(call cc-option,-mabi=altivec)
 endif
 
 # The GCC option -ffreestanding is required in order to compile code containing
index 5ad042345ab9b66ea2e884824ba407897cb56833..03ca2e64b3fcd291c58311848c18794b5514d1d9 100644 (file)
@@ -12,7 +12,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR
 
 export CFLAGS
 
-SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian
+SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian dscr
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
new file mode 100644 (file)
index 0000000..b585c6c
--- /dev/null
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
new file mode 100644 (file)
index 0000000..49327ee
--- /dev/null
@@ -0,0 +1,14 @@
+TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test      \
+             dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test  \
+             dscr_sysfs_thread_test
+
+dscr_default_test: LDLIBS += -lpthread
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+
+include ../../lib.mk
+
+clean:
+       rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
new file mode 100644 (file)
index 0000000..a36af1b
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+#define SPRN_DSCR      0x11    /* Privilege state SPR */
+#define SPRN_DSCR_USR  0x03    /* Problem state SPR */
+#define THREADS                100     /* Max threads */
+#define COUNT          100     /* Max iterations */
+#define DSCR_MAX       16      /* Max DSCR value */
+#define LEN_MAX                100     /* Max name length */
+
+#define DSCR_DEFAULT   "/sys/devices/system/cpu/dscr_default"
+#define CPU_PATH       "/sys/devices/system/cpu/"
+
+#define rmb()  asm volatile("lwsync":::"memory")
+#define wmb()  asm volatile("lwsync":::"memory")
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+       unsigned long ret;
+
+       asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR));
+
+       return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+       asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+       unsigned long ret;
+
+       asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR_USR));
+
+       return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+       asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_USR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+       int fd = -1, ret;
+       char buf[16];
+       unsigned long val;
+
+       if (fd == -1) {
+               fd = open(DSCR_DEFAULT, O_RDONLY);
+               if (fd == -1) {
+                       perror("open() failed");
+                       exit(1);
+               }
+       }
+       memset(buf, 0, sizeof(buf));
+       lseek(fd, 0, SEEK_SET);
+       ret = read(fd, buf, sizeof(buf));
+       if (ret == -1) {
+               perror("read() failed");
+               exit(1);
+       }
+       sscanf(buf, "%lx", &val);
+       close(fd);
+       return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+       int fd = -1, ret;
+       char buf[16];
+
+       if (fd == -1) {
+               fd = open(DSCR_DEFAULT, O_RDWR);
+               if (fd == -1) {
+                       perror("open() failed");
+                       exit(1);
+               }
+       }
+       sprintf(buf, "%lx\n", val);
+       ret = write(fd, buf, strlen(buf));
+       if (ret == -1) {
+               perror("write() failed");
+               exit(1);
+       }
+       close(fd);
+}
+
+double uniform_deviate(int seed)
+{
+       return seed * (1.0 / (RAND_MAX + 1.0));
+}
+#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
new file mode 100644 (file)
index 0000000..df17c3b
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR) default test
+ *
+ * This test modifies the system wide default DSCR through
+ * it's sysfs interface and then verifies that all threads
+ * see the correct changed DSCR value immediately.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static unsigned long dscr;             /* System DSCR default */
+static unsigned long sequence;
+static unsigned long result[THREADS];
+
+static void *do_test(void *in)
+{
+       unsigned long thread = (unsigned long)in;
+       unsigned long i;
+
+       for (i = 0; i < COUNT; i++) {
+               unsigned long d, cur_dscr, cur_dscr_usr;
+               unsigned long s1, s2;
+
+               s1 = ACCESS_ONCE(sequence);
+               if (s1 & 1)
+                       continue;
+               rmb();
+
+               d = dscr;
+               cur_dscr = get_dscr();
+               cur_dscr_usr = get_dscr_usr();
+
+               rmb();
+               s2 = sequence;
+
+               if (s1 != s2)
+                       continue;
+
+               if (cur_dscr != d) {
+                       fprintf(stderr, "thread %ld kernel DSCR should be %ld "
+                               "but is %ld\n", thread, d, cur_dscr);
+                       result[thread] = 1;
+                       pthread_exit(&result[thread]);
+               }
+
+               if (cur_dscr_usr != d) {
+                       fprintf(stderr, "thread %ld user DSCR should be %ld "
+                               "but is %ld\n", thread, d, cur_dscr_usr);
+                       result[thread] = 1;
+                       pthread_exit(&result[thread]);
+               }
+       }
+       result[thread] = 0;
+       pthread_exit(&result[thread]);
+}
+
+int dscr_default(void)
+{
+       pthread_t threads[THREADS];
+       unsigned long i, *status[THREADS];
+       unsigned long orig_dscr_default;
+
+       orig_dscr_default = get_default_dscr();
+
+       /* Initial DSCR default */
+       dscr = 1;
+       set_default_dscr(dscr);
+
+       /* Spawn all testing threads */
+       for (i = 0; i < THREADS; i++) {
+               if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
+                       perror("pthread_create() failed");
+                       goto fail;
+               }
+       }
+
+       srand(getpid());
+
+       /* Keep changing the DSCR default */
+       for (i = 0; i < COUNT; i++) {
+               double ret = uniform_deviate(rand());
+
+               if (ret < 0.0001) {
+                       sequence++;
+                       wmb();
+
+                       dscr++;
+                       if (dscr > DSCR_MAX)
+                               dscr = 0;
+
+                       set_default_dscr(dscr);
+
+                       wmb();
+                       sequence++;
+               }
+       }
+
+       /* Individual testing thread exit status */
+       for (i = 0; i < THREADS; i++) {
+               if (pthread_join(threads[i], (void **)&(status[i]))) {
+                       perror("pthread_join() failed");
+                       goto fail;
+               }
+
+               if (*status[i]) {
+                       printf("%ldth thread failed to join with %ld status\n",
+                                                               i, *status[i]);
+                       goto fail;
+               }
+       }
+       set_default_dscr(orig_dscr_default);
+       return 0;
+fail:
+       set_default_dscr(orig_dscr_default);
+       return 1;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(dscr_default, "dscr_default_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
new file mode 100644 (file)
index 0000000..ad9c3ec
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_explicit(void)
+{
+       unsigned long i, dscr = 0;
+
+       srand(getpid());
+       set_dscr(dscr);
+
+       for (i = 0; i < COUNT; i++) {
+               unsigned long cur_dscr, cur_dscr_usr;
+               double ret = uniform_deviate(rand());
+
+               if (ret < 0.001) {
+                       dscr++;
+                       if (dscr > DSCR_MAX)
+                               dscr = 0;
+
+                       set_dscr(dscr);
+               }
+
+               cur_dscr = get_dscr();
+               if (cur_dscr != dscr) {
+                       fprintf(stderr, "Kernel DSCR should be %ld but "
+                                       "is %ld\n", dscr, cur_dscr);
+                       return 1;
+               }
+
+               ret = uniform_deviate(rand());
+               if (ret < 0.001) {
+                       dscr++;
+                       if (dscr > DSCR_MAX)
+                               dscr = 0;
+
+                       set_dscr_usr(dscr);
+               }
+
+               cur_dscr_usr = get_dscr_usr();
+               if (cur_dscr_usr != dscr) {
+                       fprintf(stderr, "User DSCR should be %ld but "
+                                       "is %ld\n", dscr, cur_dscr_usr);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(dscr_explicit, "dscr_explicit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644 (file)
index 0000000..8265504
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks & execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static char prog[LEN_MAX];
+
+static void do_exec(unsigned long parent_dscr)
+{
+       unsigned long cur_dscr, cur_dscr_usr;
+
+       cur_dscr = get_dscr();
+       cur_dscr_usr = get_dscr_usr();
+
+       if (cur_dscr != parent_dscr) {
+               fprintf(stderr, "Parent DSCR %ld was not inherited "
+                               "over exec (kernel value)\n", parent_dscr);
+               exit(1);
+       }
+
+       if (cur_dscr_usr != parent_dscr) {
+               fprintf(stderr, "Parent DSCR %ld was not inherited "
+                               "over exec (user value)\n", parent_dscr);
+               exit(1);
+       }
+       exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+       unsigned long i, dscr = 0;
+       pid_t pid;
+
+       for (i = 0; i < COUNT; i++) {
+               dscr++;
+               if (dscr > DSCR_MAX)
+                       dscr = 0;
+
+               if (dscr == get_default_dscr())
+                       continue;
+
+               if (i % 2 == 0)
+                       set_dscr_usr(dscr);
+               else
+                       set_dscr(dscr);
+
+               /*
+                * XXX: Force a context switch out so that DSCR
+                * current value is copied into the thread struct
+                * which is required for the child to inherit the
+                * changed value.
+                */
+               sleep(1);
+
+               pid = fork();
+               if (pid == -1) {
+                       perror("fork() failed");
+                       exit(1);
+               } else if (pid) {
+                       int status;
+
+                       if (waitpid(pid, &status, 0) == -1) {
+                               perror("waitpid() failed");
+                               exit(1);
+                       }
+
+                       if (!WIFEXITED(status)) {
+                               fprintf(stderr, "Child didn't exit cleanly\n");
+                               exit(1);
+                       }
+
+                       if (WEXITSTATUS(status) != 0) {
+                               fprintf(stderr, "Child didn't exit cleanly\n");
+                               return 1;
+                       }
+               } else {
+                       char dscr_str[16];
+
+                       sprintf(dscr_str, "%ld", dscr);
+                       execlp(prog, prog, "exec", dscr_str, NULL);
+                       exit(1);
+               }
+       }
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       if (argc == 3 && !strcmp(argv[1], "exec")) {
+               unsigned long parent_dscr;
+
+               parent_dscr = atoi(argv[2]);
+               do_exec(parent_dscr);
+       } else if (argc != 1) {
+               fprintf(stderr, "Usage: %s\n", argv[0]);
+               exit(1);
+       }
+
+       strncpy(prog, argv[0], strlen(argv[0]));
+       return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
new file mode 100644 (file)
index 0000000..4e414ca
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_inherit(void)
+{
+       unsigned long i, dscr = 0;
+       pid_t pid;
+
+       srand(getpid());
+       set_dscr(dscr);
+
+       for (i = 0; i < COUNT; i++) {
+               unsigned long cur_dscr, cur_dscr_usr;
+
+               dscr++;
+               if (dscr > DSCR_MAX)
+                       dscr = 0;
+
+               if (i % 2 == 0)
+                       set_dscr_usr(dscr);
+               else
+                       set_dscr(dscr);
+
+               /*
+                * XXX: Force a context switch out so that DSCR
+                * current value is copied into the thread struct
+                * which is required for the child to inherit the
+                * changed value.
+                */
+               sleep(1);
+
+               pid = fork();
+               if (pid == -1) {
+                       perror("fork() failed");
+                       exit(1);
+               } else if (pid) {
+                       int status;
+
+                       if (waitpid(pid, &status, 0) == -1) {
+                               perror("waitpid() failed");
+                               exit(1);
+                       }
+
+                       if (!WIFEXITED(status)) {
+                               fprintf(stderr, "Child didn't exit cleanly\n");
+                               exit(1);
+                       }
+
+                       if (WEXITSTATUS(status) != 0) {
+                               fprintf(stderr, "Child didn't exit cleanly\n");
+                               return 1;
+                       }
+               } else {
+                       cur_dscr = get_dscr();
+                       if (cur_dscr != dscr) {
+                               fprintf(stderr, "Kernel DSCR should be %ld "
+                                       "but is %ld\n", dscr, cur_dscr);
+                               exit(1);
+                       }
+
+                       cur_dscr_usr = get_dscr_usr();
+                       if (cur_dscr_usr != dscr) {
+                               fprintf(stderr, "User DSCR should be %ld "
+                                       "but is %ld\n", dscr, cur_dscr_usr);
+                               exit(1);
+                       }
+                       exit(0);
+               }
+       }
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(dscr_inherit, "dscr_inherit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644 (file)
index 0000000..17fb1b4
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+       char buf[10];
+       int fd, rc;
+
+       fd = open(file, O_RDWR);
+       if (fd == -1) {
+               perror("open() failed");
+               return 1;
+       }
+
+       rc = read(fd, buf, sizeof(buf));
+       if (rc == -1) {
+               perror("read() failed");
+               return 1;
+       }
+       close(fd);
+
+       buf[rc] = '\0';
+       if (strtol(buf, NULL, 16) != val) {
+               printf("DSCR match failed: %ld (system) %ld (cpu)\n",
+                                       val, strtol(buf, NULL, 16));
+               return 1;
+       }
+       return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+       DIR *sysfs;
+       struct dirent *dp;
+       char file[LEN_MAX];
+
+       sysfs = opendir(CPU_PATH);
+       if (!sysfs) {
+               perror("opendir() failed");
+               return 1;
+       }
+
+       while ((dp = readdir(sysfs))) {
+               if (!(dp->d_type & DT_DIR))
+                       continue;
+               if (!strcmp(dp->d_name, "cpuidle"))
+                       continue;
+               if (!strstr(dp->d_name, "cpu"))
+                       continue;
+
+               sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name);
+               if (access(file, F_OK))
+                       continue;
+
+               if (check_cpu_dscr_default(file, val))
+                       return 1;
+       }
+       closedir(sysfs);
+       return 0;
+}
+
+int dscr_sysfs(void)
+{
+       unsigned long orig_dscr_default;
+       int i, j;
+
+       orig_dscr_default = get_default_dscr();
+       for (i = 0; i < COUNT; i++) {
+               for (j = 0; j < DSCR_MAX; j++) {
+                       set_default_dscr(j);
+                       if (check_all_cpu_dscr_defaults(j))
+                               goto fail;
+               }
+       }
+       set_default_dscr(orig_dscr_default);
+       return 0;
+fail:
+       set_default_dscr(orig_dscr_default);
+       return 1;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(dscr_sysfs, "dscr_sysfs_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644 (file)
index 0000000..ad97b59
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include "dscr.h"
+
+static int test_thread_dscr(unsigned long val)
+{
+       unsigned long cur_dscr, cur_dscr_usr;
+
+       cur_dscr = get_dscr();
+       cur_dscr_usr = get_dscr_usr();
+
+       if (val != cur_dscr) {
+               printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n",
+                                       sched_getcpu(), val, cur_dscr);
+               return 1;
+       }
+
+       if (val != cur_dscr_usr) {
+               printf("[cpu %d] User DSCR should be %ld but is %ld\n",
+                                       sched_getcpu(), val, cur_dscr_usr);
+               return 1;
+       }
+       return 0;
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+       cpu_set_t mask;
+       int cpu;
+
+       for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+               CPU_ZERO(&mask);
+               CPU_SET(cpu, &mask);
+               if (sched_setaffinity(0, sizeof(mask), &mask))
+                       continue;
+
+               if (test_thread_dscr(val))
+                       return 1;
+       }
+       return 0;
+
+}
+
+int dscr_sysfs_thread(void)
+{
+       unsigned long orig_dscr_default;
+       int i, j;
+
+       orig_dscr_default = get_default_dscr();
+       for (i = 0; i < COUNT; i++) {
+               for (j = 0; j < DSCR_MAX; j++) {
+                       set_default_dscr(j);
+                       if (check_cpu_dscr_thread(j))
+                               goto fail;
+               }
+       }
+       set_default_dscr(orig_dscr_default);
+       return 0;
+fail:
+       set_default_dscr(orig_dscr_default);
+       return 1;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
new file mode 100644 (file)
index 0000000..77d16b5
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2013, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_dscr(char *str)
+{
+       unsigned long cur_dscr, cur_dscr_usr;
+
+       cur_dscr = get_dscr();
+       cur_dscr_usr = get_dscr_usr();
+       if (cur_dscr != cur_dscr_usr) {
+               printf("%s set, kernel get %lx != user get %lx\n",
+                                       str, cur_dscr, cur_dscr_usr);
+               return 1;
+       }
+       return 0;
+}
+
+int dscr_user(void)
+{
+       int i;
+
+       check_dscr("");
+
+       for (i = 0; i < COUNT; i++) {
+               set_dscr(i);
+               if (check_dscr("kernel"))
+                       return 1;
+       }
+
+       for (i = 0; i < COUNT; i++) {
+               set_dscr_usr(i);
+               if (check_dscr("user"))
+                       return 1;
+       }
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(dscr_user, "dscr_user_test");
+}
index 081473db22b779759b1fec4e809570510a0a322e..e21d10674e54135637b540ae8b0457e0ea3075cd 100644 (file)
@@ -1,9 +1,8 @@
-CC := $(CROSS_COMPILE)gcc
-PROGS := switch_endian_test
+TEST_PROGS := switch_endian_test
 
 ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
 switch_endian_test: check-reversed.S
 
@@ -13,12 +12,7 @@ check-reversed.o: check.o
 check-reversed.S: check-reversed.o
        hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o check-reversed.S
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o check-reversed.S
index 6bff955e1d55ac6cccb526f5a00355ac4e904973..4bea62a319dcaf96ee85f58aaa3cf6160d0428d5 100644 (file)
@@ -1,11 +1,11 @@
-TEST_PROGS := tm-resched-dscr
+TEST_PROGS := tm-resched-dscr tm-syscall
 
 all: $(TEST_PROGS)
 
 $(TEST_PROGS): ../harness.c
 
 tm-syscall: tm-syscall-asm.S
-tm-syscall: CFLAGS += -mhtm
+tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include
 
 include ../../lib.mk
 
index 3ed8d4b252fac440b6dd2db56a84a7fcf330fe0c..1276e23da63bbdf91d328472f581e32e8979a0fa 100644 (file)
@@ -82,7 +82,8 @@ int tm_syscall(void)
        unsigned count = 0;
        struct timeval end, now;
 
-       SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM));
+       SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2)
+                 & PPC_FEATURE2_HTM_NOSC));
        setbuf(stdout, NULL);
 
        printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
index e539f775fd8f0a7459f0df0bb33b7bb5de5cb6e5..a485f2e286ae22cae0706c7cd0fc735d4c7206e3 100644 (file)
@@ -1,15 +1,12 @@
-PROG := test-vphn
+TEST_PROGS := test-vphn
 
 CFLAGS += -m64
 
-all: $(PROG)
+all: $(TEST_PROGS)
 
-$(PROG): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       ./$(PROG)
+include ../../lib.mk
 
 clean:
-       rm -f $(PROG)
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS)