R600 -> AMDGPU rename
authorTom Stellard <thomas.stellard@amd.com>
Sat, 13 Jun 2015 03:28:10 +0000 (03:28 +0000)
committerTom Stellard <thomas.stellard@amd.com>
Sat, 13 Jun 2015 03:28:10 +0000 (03:28 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239657 91177308-0d34-0410-b5e6-96231b3b80d8

1087 files changed:
CMakeLists.txt
autoconf/configure.ac
configure
docs/AMDGPUUsage.rst [new file with mode: 0644]
docs/CompilerWriterInfo.rst
docs/GettingStarted.rst
docs/R600Usage.rst [deleted file]
docs/index.rst
lib/Target/AMDGPU/AMDGPU.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPU.td [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUAsmPrinter.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUCallingConv.td [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUFrameLowering.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUFrameLowering.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUISelLowering.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUISelLowering.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUInstrInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUInstrInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUInstrInfo.td [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUInstructions.td [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUIntrinsics.td [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUMCInstLower.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUMCInstLower.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUMachineFunction.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUMachineFunction.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPURegisterInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPURegisterInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPURegisterInfo.td [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUSubtarget.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUSubtarget.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUTargetMachine.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/AMDILCFGStructurizer.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AMDKernelCodeT.h [new file with mode: 0644]
lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp [new file with mode: 0644]
lib/Target/AMDGPU/AsmParser/CMakeLists.txt [new file with mode: 0644]
lib/Target/AMDGPU/AsmParser/LLVMBuild.txt [new file with mode: 0644]
lib/Target/AMDGPU/AsmParser/Makefile [new file with mode: 0644]
lib/Target/AMDGPU/CIInstructions.td [new file with mode: 0644]
lib/Target/AMDGPU/CMakeLists.txt [new file with mode: 0644]
lib/Target/AMDGPU/CaymanInstructions.td [new file with mode: 0644]
lib/Target/AMDGPU/EvergreenInstructions.td [new file with mode: 0644]
lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h [new file with mode: 0644]
lib/Target/AMDGPU/InstPrinter/CMakeLists.txt [new file with mode: 0644]
lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt [new file with mode: 0644]
lib/Target/AMDGPU/InstPrinter/Makefile [new file with mode: 0644]
lib/Target/AMDGPU/LLVMBuild.txt [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/Makefile [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/Makefile [new file with mode: 0644]
lib/Target/AMDGPU/Processors.td [new file with mode: 0644]
lib/Target/AMDGPU/R600ClauseMergePass.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600Defines.h [new file with mode: 0644]
lib/Target/AMDGPU/R600EmitClauseMarkers.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600ISelLowering.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600ISelLowering.h [new file with mode: 0644]
lib/Target/AMDGPU/R600InstrFormats.td [new file with mode: 0644]
lib/Target/AMDGPU/R600InstrInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600InstrInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/R600Instructions.td [new file with mode: 0644]
lib/Target/AMDGPU/R600Intrinsics.td [new file with mode: 0644]
lib/Target/AMDGPU/R600MachineFunctionInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600MachineFunctionInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/R600MachineScheduler.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600MachineScheduler.h [new file with mode: 0644]
lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600Packetizer.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600RegisterInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R600RegisterInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/R600RegisterInfo.td [new file with mode: 0644]
lib/Target/AMDGPU/R600Schedule.td [new file with mode: 0644]
lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp [new file with mode: 0644]
lib/Target/AMDGPU/R700Instructions.td [new file with mode: 0644]
lib/Target/AMDGPU/SIAnnotateControlFlow.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIDefines.h [new file with mode: 0644]
lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIFixSGPRCopies.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIFoldOperands.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIISelLowering.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIISelLowering.h [new file with mode: 0644]
lib/Target/AMDGPU/SIInsertWaits.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIInstrFormats.td [new file with mode: 0644]
lib/Target/AMDGPU/SIInstrInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIInstrInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/SIInstrInfo.td [new file with mode: 0644]
lib/Target/AMDGPU/SIInstructions.td [new file with mode: 0644]
lib/Target/AMDGPU/SIIntrinsics.td [new file with mode: 0644]
lib/Target/AMDGPU/SILoadStoreOptimizer.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SILowerControlFlow.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SILowerI1Copies.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIMachineFunctionInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIMachineFunctionInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/SIPrepareScratchRegs.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIRegisterInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SIRegisterInfo.h [new file with mode: 0644]
lib/Target/AMDGPU/SIRegisterInfo.td [new file with mode: 0644]
lib/Target/AMDGPU/SISchedule.td [new file with mode: 0644]
lib/Target/AMDGPU/SIShrinkInstructions.cpp [new file with mode: 0644]
lib/Target/AMDGPU/SITypeRewriter.cpp [new file with mode: 0644]
lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp [new file with mode: 0644]
lib/Target/AMDGPU/TargetInfo/CMakeLists.txt [new file with mode: 0644]
lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt [new file with mode: 0644]
lib/Target/AMDGPU/TargetInfo/Makefile [new file with mode: 0644]
lib/Target/AMDGPU/VIInstrFormats.td [new file with mode: 0644]
lib/Target/AMDGPU/VIInstructions.td [new file with mode: 0644]
lib/Target/LLVMBuild.txt
lib/Target/R600/AMDGPU.h [deleted file]
lib/Target/R600/AMDGPU.td [deleted file]
lib/Target/R600/AMDGPUAlwaysInlinePass.cpp [deleted file]
lib/Target/R600/AMDGPUAsmPrinter.cpp [deleted file]
lib/Target/R600/AMDGPUAsmPrinter.h [deleted file]
lib/Target/R600/AMDGPUCallingConv.td [deleted file]
lib/Target/R600/AMDGPUFrameLowering.cpp [deleted file]
lib/Target/R600/AMDGPUFrameLowering.h [deleted file]
lib/Target/R600/AMDGPUISelDAGToDAG.cpp [deleted file]
lib/Target/R600/AMDGPUISelLowering.cpp [deleted file]
lib/Target/R600/AMDGPUISelLowering.h [deleted file]
lib/Target/R600/AMDGPUInstrInfo.cpp [deleted file]
lib/Target/R600/AMDGPUInstrInfo.h [deleted file]
lib/Target/R600/AMDGPUInstrInfo.td [deleted file]
lib/Target/R600/AMDGPUInstructions.td [deleted file]
lib/Target/R600/AMDGPUIntrinsicInfo.cpp [deleted file]
lib/Target/R600/AMDGPUIntrinsicInfo.h [deleted file]
lib/Target/R600/AMDGPUIntrinsics.td [deleted file]
lib/Target/R600/AMDGPUMCInstLower.cpp [deleted file]
lib/Target/R600/AMDGPUMCInstLower.h [deleted file]
lib/Target/R600/AMDGPUMachineFunction.cpp [deleted file]
lib/Target/R600/AMDGPUMachineFunction.h [deleted file]
lib/Target/R600/AMDGPUPromoteAlloca.cpp [deleted file]
lib/Target/R600/AMDGPURegisterInfo.cpp [deleted file]
lib/Target/R600/AMDGPURegisterInfo.h [deleted file]
lib/Target/R600/AMDGPURegisterInfo.td [deleted file]
lib/Target/R600/AMDGPUSubtarget.cpp [deleted file]
lib/Target/R600/AMDGPUSubtarget.h [deleted file]
lib/Target/R600/AMDGPUTargetMachine.cpp [deleted file]
lib/Target/R600/AMDGPUTargetMachine.h [deleted file]
lib/Target/R600/AMDGPUTargetTransformInfo.cpp [deleted file]
lib/Target/R600/AMDGPUTargetTransformInfo.h [deleted file]
lib/Target/R600/AMDILCFGStructurizer.cpp [deleted file]
lib/Target/R600/AMDKernelCodeT.h [deleted file]
lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp [deleted file]
lib/Target/R600/AsmParser/CMakeLists.txt [deleted file]
lib/Target/R600/AsmParser/LLVMBuild.txt [deleted file]
lib/Target/R600/AsmParser/Makefile [deleted file]
lib/Target/R600/CIInstructions.td [deleted file]
lib/Target/R600/CMakeLists.txt [deleted file]
lib/Target/R600/CaymanInstructions.td [deleted file]
lib/Target/R600/EvergreenInstructions.td [deleted file]
lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp [deleted file]
lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h [deleted file]
lib/Target/R600/InstPrinter/CMakeLists.txt [deleted file]
lib/Target/R600/InstPrinter/LLVMBuild.txt [deleted file]
lib/Target/R600/InstPrinter/Makefile [deleted file]
lib/Target/R600/LLVMBuild.txt [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp [deleted file]
lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h [deleted file]
lib/Target/R600/MCTargetDesc/CMakeLists.txt [deleted file]
lib/Target/R600/MCTargetDesc/LLVMBuild.txt [deleted file]
lib/Target/R600/MCTargetDesc/Makefile [deleted file]
lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp [deleted file]
lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp [deleted file]
lib/Target/R600/Makefile [deleted file]
lib/Target/R600/Processors.td [deleted file]
lib/Target/R600/R600ClauseMergePass.cpp [deleted file]
lib/Target/R600/R600ControlFlowFinalizer.cpp [deleted file]
lib/Target/R600/R600Defines.h [deleted file]
lib/Target/R600/R600EmitClauseMarkers.cpp [deleted file]
lib/Target/R600/R600ExpandSpecialInstrs.cpp [deleted file]
lib/Target/R600/R600ISelLowering.cpp [deleted file]
lib/Target/R600/R600ISelLowering.h [deleted file]
lib/Target/R600/R600InstrFormats.td [deleted file]
lib/Target/R600/R600InstrInfo.cpp [deleted file]
lib/Target/R600/R600InstrInfo.h [deleted file]
lib/Target/R600/R600Instructions.td [deleted file]
lib/Target/R600/R600Intrinsics.td [deleted file]
lib/Target/R600/R600MachineFunctionInfo.cpp [deleted file]
lib/Target/R600/R600MachineFunctionInfo.h [deleted file]
lib/Target/R600/R600MachineScheduler.cpp [deleted file]
lib/Target/R600/R600MachineScheduler.h [deleted file]
lib/Target/R600/R600OptimizeVectorRegisters.cpp [deleted file]
lib/Target/R600/R600Packetizer.cpp [deleted file]
lib/Target/R600/R600RegisterInfo.cpp [deleted file]
lib/Target/R600/R600RegisterInfo.h [deleted file]
lib/Target/R600/R600RegisterInfo.td [deleted file]
lib/Target/R600/R600Schedule.td [deleted file]
lib/Target/R600/R600TextureIntrinsicsReplacer.cpp [deleted file]
lib/Target/R600/R700Instructions.td [deleted file]
lib/Target/R600/SIAnnotateControlFlow.cpp [deleted file]
lib/Target/R600/SIDefines.h [deleted file]
lib/Target/R600/SIFixControlFlowLiveIntervals.cpp [deleted file]
lib/Target/R600/SIFixSGPRCopies.cpp [deleted file]
lib/Target/R600/SIFixSGPRLiveRanges.cpp [deleted file]
lib/Target/R600/SIFoldOperands.cpp [deleted file]
lib/Target/R600/SIISelLowering.cpp [deleted file]
lib/Target/R600/SIISelLowering.h [deleted file]
lib/Target/R600/SIInsertWaits.cpp [deleted file]
lib/Target/R600/SIInstrFormats.td [deleted file]
lib/Target/R600/SIInstrInfo.cpp [deleted file]
lib/Target/R600/SIInstrInfo.h [deleted file]
lib/Target/R600/SIInstrInfo.td [deleted file]
lib/Target/R600/SIInstructions.td [deleted file]
lib/Target/R600/SIIntrinsics.td [deleted file]
lib/Target/R600/SILoadStoreOptimizer.cpp [deleted file]
lib/Target/R600/SILowerControlFlow.cpp [deleted file]
lib/Target/R600/SILowerI1Copies.cpp [deleted file]
lib/Target/R600/SIMachineFunctionInfo.cpp [deleted file]
lib/Target/R600/SIMachineFunctionInfo.h [deleted file]
lib/Target/R600/SIPrepareScratchRegs.cpp [deleted file]
lib/Target/R600/SIRegisterInfo.cpp [deleted file]
lib/Target/R600/SIRegisterInfo.h [deleted file]
lib/Target/R600/SIRegisterInfo.td [deleted file]
lib/Target/R600/SISchedule.td [deleted file]
lib/Target/R600/SIShrinkInstructions.cpp [deleted file]
lib/Target/R600/SITypeRewriter.cpp [deleted file]
lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp [deleted file]
lib/Target/R600/TargetInfo/CMakeLists.txt [deleted file]
lib/Target/R600/TargetInfo/LLVMBuild.txt [deleted file]
lib/Target/R600/TargetInfo/Makefile [deleted file]
lib/Target/R600/VIInstrFormats.td [deleted file]
lib/Target/R600/VIInstructions.td [deleted file]
test/CodeGen/AMDGPU/32-bit-local-address-space.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/README [new file with mode: 0644]
test/CodeGen/AMDGPU/add-debug.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/add.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/add_i64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/address-space.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/and.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/anyext.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/array-ptr-calc-i32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/array-ptr-calc-i64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/atomic_load_add.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/atomic_load_sub.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/basic-branch.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/basic-loop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/bfe_uint.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/bfi_int.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/big_alu.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/bitcast.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/bswap.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/build_vector.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/call.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/call_fs.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cayman-loop-bug.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cf-stack-bug.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cf_end.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cgp-addressing-modes.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/coalescer_remat.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/combine_vloads.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/commute-compares.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/commute_modifiers.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/complex-folding.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/concat_vectors.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/copy-illegal-type.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/copy-to-reg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ctlz_zero_undef.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ctpop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ctpop64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cttz_zero_undef.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cvt_f32_ubyte.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/debug.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/default-fp-mode.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/dot4-folding.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ds_read2.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ds_read2_offset_order.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ds_read2st64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ds_write2.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ds_write2st64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/elf.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/elf.r600.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/empty-function.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/endcf-loop-header.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/extload-private.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/extload.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/extract_vector_elt_i16.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fabs.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fabs.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fadd.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fadd64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fceil.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fceil64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fcmp-cnd.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fcmp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fcmp64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fconst64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fcopysign.f32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fcopysign.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fdiv.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fdiv.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fetch-limits.r600.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fetch-limits.r700+.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ffloor.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ffloor.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/flat-address-space.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/floor.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fma-combine.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fma.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fma.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmad.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmax.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmax3.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmax3.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmax_legacy.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmax_legacy.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmaxnum.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmaxnum.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmin.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmin3.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmin_legacy.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmin_legacy.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fminnum.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fminnum.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmul.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmul64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fmuladd.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fnearbyint.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fneg-fabs.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fneg-fabs.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fneg.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fneg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp-classify.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp16_to_fp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp32_to_fp16.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp_to_sint.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp_to_sint.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp_to_uint.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fp_to_uint.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fpext.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fptrunc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/frem.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fsqrt.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fsub.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/fsub64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ftrunc.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ftrunc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/gep-address-space.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global-directive.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global-extload-i1.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global-extload-i16.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global-extload-i32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global-extload-i8.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global-zero-initializer.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/global_atomics.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/gv-const-addrspace.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/half.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/hsa.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/i1-copy-implicit-def.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/i1-copy-phi.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/i8-to-double-to-float.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/icmp64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/imm.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/indirect-addressing-si.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/indirect-private-64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/infinite-loop-evergreen.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/infinite-loop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/inline-asm.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/inline-calls.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/input-mods.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/insert_subreg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/insert_vector_elt.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/jump-address.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/kcache-fold.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/kernel-args.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/large-alloca.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/large-constant-initializer.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lds-initializer.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lds-oqap-crash.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lds-output-queue.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lds-size.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lds-zero-initializer.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lit.local.cfg [new file with mode: 0644]
test/CodeGen/AMDGPU/literals.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.global.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.barrier.local.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.u32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.bfi.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.bfm.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.cvt_f32_ubyte.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.fract.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.imax.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.imin.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.tex.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.umax.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.umin.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.gather4.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.getlod.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.image.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.image.sample.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.image.sample.o.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.imageload.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.load.dword.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.resinfo.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.sample.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.sampled.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.SI.tid.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.amdgpu.dp4.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.cos.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.exp2.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.log2.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.memcpy.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.pow.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.rint.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.rint.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.round.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.round.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.sin.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/llvm.sqrt.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/load-i1.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/load-input-fold.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/load.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/load.vec.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/load64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/local-64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/local-atomics.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/local-atomics64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/local-memory-two-objects.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/local-memory.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/loop-address.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/loop-idiom.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lshl.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/lshr.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/m0-spill.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mad-combine.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mad-sub.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mad_int24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mad_uint24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/madak.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/madmk.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/max-literals.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/max.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/max3.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/merge-stores.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/min.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/min3.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/missing-store.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mubuf.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mul.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mul_int24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mul_uint24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/mulhu.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/no-shrink-extloads.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/operand-folding.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/operand-spacing.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/or.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/packetizer.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/parallelandifcollapse.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/parallelorifcollapse.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/predicate-dp4.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/predicates.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/private-memory-atomics.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/private-memory-broken.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/private-memory.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/pv-packing.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/pv.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/r600-encoding.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/r600-export-fix.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/r600cfg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/reciprocal.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/register-count-comments.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/reorder-stores.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/rotl.i64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/rotl.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/rotr.i64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/rotr.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/rsq.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/rv7x0_count3.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/s_movk_i32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/saddo.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/salu-to-valu.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/scalar_to_vector.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-fs-loop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-global-loads.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-if-2.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-if.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/scratch-buffer.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sdiv.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sdivrem24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sdivrem64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/select-i1.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/select-vectors.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/select.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/select64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/selectcc-cnd.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/selectcc-cnde-int.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/selectcc-opt.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/selectcc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/set-dx10.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/setcc-equivalent.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/setcc-opt.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/setcc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/setcc64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/seto.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/setuo.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sext-eliminate.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sext-in-reg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sgpr-control-flow.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sgpr-copy.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/shared-op-cycle.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/shl.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/shl_add_constant.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/shl_add_ptr.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-annotate-cf-assertion.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-annotate-cf.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-lod-bias.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-sgpr-spill.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-spill-cf.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/si-vector-hang.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sign_extend.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/simplify-demanded-bits-build-pair.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sint_to_fp.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sint_to_fp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/smrd.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/split-scalar-i64-add.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sra.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/srem.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/srl.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/ssubo.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/store-barrier.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/store-v3i32.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/store-v3i64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/store-vector-ptrs.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/store.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/store.r600.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/structurize.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/structurize1.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/sub.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/subreg-coalescer-crash.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/subreg-eliminate-dead.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/swizzle-export.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/tex-clause-antidep.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/texture-input-merge.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/trunc-cmp-constant.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/trunc-store-i1.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/trunc-vector-store-assertion-failure.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/trunc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/tti-unroll-prefs.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/uaddo.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/udiv.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/udivrem.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/udivrem24.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/udivrem64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/uint_to_fp.f64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/uint_to_fp.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/unaligned-load-store.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/unroll.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/unsupported-cc.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/urecip.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/urem.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/usubo.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/v1i64-kernel-arg.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/v_cndmask.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/valu-i1.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vector-alloca.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vertex-fetch-encoding.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vop-shrink.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vselect.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vselect64.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vtx-fetch-branch.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/vtx-schedule.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/wait.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/work-item-intrinsics.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/xor.ll [new file with mode: 0644]
test/CodeGen/AMDGPU/zero_extend.ll [new file with mode: 0644]
test/CodeGen/R600/32-bit-local-address-space.ll [deleted file]
test/CodeGen/R600/README [deleted file]
test/CodeGen/R600/add-debug.ll [deleted file]
test/CodeGen/R600/add.ll [deleted file]
test/CodeGen/R600/add_i64.ll [deleted file]
test/CodeGen/R600/address-space.ll [deleted file]
test/CodeGen/R600/and.ll [deleted file]
test/CodeGen/R600/anyext.ll [deleted file]
test/CodeGen/R600/array-ptr-calc-i32.ll [deleted file]
test/CodeGen/R600/array-ptr-calc-i64.ll [deleted file]
test/CodeGen/R600/atomic_cmp_swap_local.ll [deleted file]
test/CodeGen/R600/atomic_load_add.ll [deleted file]
test/CodeGen/R600/atomic_load_sub.ll [deleted file]
test/CodeGen/R600/basic-branch.ll [deleted file]
test/CodeGen/R600/basic-loop.ll [deleted file]
test/CodeGen/R600/bfe_uint.ll [deleted file]
test/CodeGen/R600/bfi_int.ll [deleted file]
test/CodeGen/R600/big_alu.ll [deleted file]
test/CodeGen/R600/bitcast.ll [deleted file]
test/CodeGen/R600/bswap.ll [deleted file]
test/CodeGen/R600/build_vector.ll [deleted file]
test/CodeGen/R600/call.ll [deleted file]
test/CodeGen/R600/call_fs.ll [deleted file]
test/CodeGen/R600/cayman-loop-bug.ll [deleted file]
test/CodeGen/R600/cf-stack-bug.ll [deleted file]
test/CodeGen/R600/cf_end.ll [deleted file]
test/CodeGen/R600/cgp-addressing-modes.ll [deleted file]
test/CodeGen/R600/coalescer_remat.ll [deleted file]
test/CodeGen/R600/codegen-prepare-addrmode-sext.ll [deleted file]
test/CodeGen/R600/combine_vloads.ll [deleted file]
test/CodeGen/R600/commute-compares.ll [deleted file]
test/CodeGen/R600/commute_modifiers.ll [deleted file]
test/CodeGen/R600/complex-folding.ll [deleted file]
test/CodeGen/R600/concat_vectors.ll [deleted file]
test/CodeGen/R600/copy-illegal-type.ll [deleted file]
test/CodeGen/R600/copy-to-reg.ll [deleted file]
test/CodeGen/R600/ctlz_zero_undef.ll [deleted file]
test/CodeGen/R600/ctpop.ll [deleted file]
test/CodeGen/R600/ctpop64.ll [deleted file]
test/CodeGen/R600/cttz_zero_undef.ll [deleted file]
test/CodeGen/R600/cvt_f32_ubyte.ll [deleted file]
test/CodeGen/R600/cvt_flr_i32_f32.ll [deleted file]
test/CodeGen/R600/cvt_rpi_i32_f32.ll [deleted file]
test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll [deleted file]
test/CodeGen/R600/debug.ll [deleted file]
test/CodeGen/R600/default-fp-mode.ll [deleted file]
test/CodeGen/R600/disconnected-predset-break-bug.ll [deleted file]
test/CodeGen/R600/dot4-folding.ll [deleted file]
test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll [deleted file]
test/CodeGen/R600/ds_read2.ll [deleted file]
test/CodeGen/R600/ds_read2_offset_order.ll [deleted file]
test/CodeGen/R600/ds_read2st64.ll [deleted file]
test/CodeGen/R600/ds_write2.ll [deleted file]
test/CodeGen/R600/ds_write2st64.ll [deleted file]
test/CodeGen/R600/elf.ll [deleted file]
test/CodeGen/R600/elf.r600.ll [deleted file]
test/CodeGen/R600/empty-function.ll [deleted file]
test/CodeGen/R600/endcf-loop-header.ll [deleted file]
test/CodeGen/R600/extload-private.ll [deleted file]
test/CodeGen/R600/extload.ll [deleted file]
test/CodeGen/R600/extract_vector_elt_i16.ll [deleted file]
test/CodeGen/R600/fabs.f64.ll [deleted file]
test/CodeGen/R600/fabs.ll [deleted file]
test/CodeGen/R600/fadd.ll [deleted file]
test/CodeGen/R600/fadd64.ll [deleted file]
test/CodeGen/R600/fceil.ll [deleted file]
test/CodeGen/R600/fceil64.ll [deleted file]
test/CodeGen/R600/fcmp-cnd.ll [deleted file]
test/CodeGen/R600/fcmp-cnde-int-args.ll [deleted file]
test/CodeGen/R600/fcmp.ll [deleted file]
test/CodeGen/R600/fcmp64.ll [deleted file]
test/CodeGen/R600/fconst64.ll [deleted file]
test/CodeGen/R600/fcopysign.f32.ll [deleted file]
test/CodeGen/R600/fcopysign.f64.ll [deleted file]
test/CodeGen/R600/fdiv.f64.ll [deleted file]
test/CodeGen/R600/fdiv.ll [deleted file]
test/CodeGen/R600/fetch-limits.r600.ll [deleted file]
test/CodeGen/R600/fetch-limits.r700+.ll [deleted file]
test/CodeGen/R600/ffloor.f64.ll [deleted file]
test/CodeGen/R600/ffloor.ll [deleted file]
test/CodeGen/R600/flat-address-space.ll [deleted file]
test/CodeGen/R600/floor.ll [deleted file]
test/CodeGen/R600/fma-combine.ll [deleted file]
test/CodeGen/R600/fma.f64.ll [deleted file]
test/CodeGen/R600/fma.ll [deleted file]
test/CodeGen/R600/fmad.ll [deleted file]
test/CodeGen/R600/fmax.ll [deleted file]
test/CodeGen/R600/fmax3.f64.ll [deleted file]
test/CodeGen/R600/fmax3.ll [deleted file]
test/CodeGen/R600/fmax_legacy.f64.ll [deleted file]
test/CodeGen/R600/fmax_legacy.ll [deleted file]
test/CodeGen/R600/fmaxnum.f64.ll [deleted file]
test/CodeGen/R600/fmaxnum.ll [deleted file]
test/CodeGen/R600/fmin.ll [deleted file]
test/CodeGen/R600/fmin3.ll [deleted file]
test/CodeGen/R600/fmin_legacy.f64.ll [deleted file]
test/CodeGen/R600/fmin_legacy.ll [deleted file]
test/CodeGen/R600/fminnum.f64.ll [deleted file]
test/CodeGen/R600/fminnum.ll [deleted file]
test/CodeGen/R600/fmul.ll [deleted file]
test/CodeGen/R600/fmul64.ll [deleted file]
test/CodeGen/R600/fmuladd.ll [deleted file]
test/CodeGen/R600/fnearbyint.ll [deleted file]
test/CodeGen/R600/fneg-fabs.f64.ll [deleted file]
test/CodeGen/R600/fneg-fabs.ll [deleted file]
test/CodeGen/R600/fneg.f64.ll [deleted file]
test/CodeGen/R600/fneg.ll [deleted file]
test/CodeGen/R600/fp-classify.ll [deleted file]
test/CodeGen/R600/fp16_to_fp.ll [deleted file]
test/CodeGen/R600/fp32_to_fp16.ll [deleted file]
test/CodeGen/R600/fp_to_sint.f64.ll [deleted file]
test/CodeGen/R600/fp_to_sint.ll [deleted file]
test/CodeGen/R600/fp_to_uint.f64.ll [deleted file]
test/CodeGen/R600/fp_to_uint.ll [deleted file]
test/CodeGen/R600/fpext.ll [deleted file]
test/CodeGen/R600/fptrunc.ll [deleted file]
test/CodeGen/R600/frem.ll [deleted file]
test/CodeGen/R600/fsqrt.ll [deleted file]
test/CodeGen/R600/fsub.ll [deleted file]
test/CodeGen/R600/fsub64.ll [deleted file]
test/CodeGen/R600/ftrunc.f64.ll [deleted file]
test/CodeGen/R600/ftrunc.ll [deleted file]
test/CodeGen/R600/gep-address-space.ll [deleted file]
test/CodeGen/R600/global-directive.ll [deleted file]
test/CodeGen/R600/global-extload-i1.ll [deleted file]
test/CodeGen/R600/global-extload-i16.ll [deleted file]
test/CodeGen/R600/global-extload-i32.ll [deleted file]
test/CodeGen/R600/global-extload-i8.ll [deleted file]
test/CodeGen/R600/global-zero-initializer.ll [deleted file]
test/CodeGen/R600/global_atomics.ll [deleted file]
test/CodeGen/R600/gv-const-addrspace-fail.ll [deleted file]
test/CodeGen/R600/gv-const-addrspace.ll [deleted file]
test/CodeGen/R600/half.ll [deleted file]
test/CodeGen/R600/hsa.ll [deleted file]
test/CodeGen/R600/i1-copy-implicit-def.ll [deleted file]
test/CodeGen/R600/i1-copy-phi.ll [deleted file]
test/CodeGen/R600/i8-to-double-to-float.ll [deleted file]
test/CodeGen/R600/icmp-select-sete-reverse-args.ll [deleted file]
test/CodeGen/R600/icmp64.ll [deleted file]
test/CodeGen/R600/imm.ll [deleted file]
test/CodeGen/R600/indirect-addressing-si.ll [deleted file]
test/CodeGen/R600/indirect-private-64.ll [deleted file]
test/CodeGen/R600/infinite-loop-evergreen.ll [deleted file]
test/CodeGen/R600/infinite-loop.ll [deleted file]
test/CodeGen/R600/inline-asm.ll [deleted file]
test/CodeGen/R600/inline-calls.ll [deleted file]
test/CodeGen/R600/input-mods.ll [deleted file]
test/CodeGen/R600/insert_subreg.ll [deleted file]
test/CodeGen/R600/insert_vector_elt.ll [deleted file]
test/CodeGen/R600/jump-address.ll [deleted file]
test/CodeGen/R600/kcache-fold.ll [deleted file]
test/CodeGen/R600/kernel-args.ll [deleted file]
test/CodeGen/R600/large-alloca.ll [deleted file]
test/CodeGen/R600/large-constant-initializer.ll [deleted file]
test/CodeGen/R600/lds-initializer.ll [deleted file]
test/CodeGen/R600/lds-oqap-crash.ll [deleted file]
test/CodeGen/R600/lds-output-queue.ll [deleted file]
test/CodeGen/R600/lds-size.ll [deleted file]
test/CodeGen/R600/lds-zero-initializer.ll [deleted file]
test/CodeGen/R600/legalizedag-bug-expand-setcc.ll [deleted file]
test/CodeGen/R600/lit.local.cfg [deleted file]
test/CodeGen/R600/literals.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.abs.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.bfi.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.bfm.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.brev.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.clamp.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.class.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.cube.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.div_scale.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.fract.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.imad24.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.imax.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.imin.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.imul24.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.kill.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.ldexp.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.mul.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.rcp.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.rsq.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.tex.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.trunc.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.umad24.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.umax.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.umin.ll [deleted file]
test/CodeGen/R600/llvm.AMDGPU.umul24.ll [deleted file]
test/CodeGen/R600/llvm.SI.fs.interp.ll [deleted file]
test/CodeGen/R600/llvm.SI.gather4.ll [deleted file]
test/CodeGen/R600/llvm.SI.getlod.ll [deleted file]
test/CodeGen/R600/llvm.SI.image.ll [deleted file]
test/CodeGen/R600/llvm.SI.image.sample.ll [deleted file]
test/CodeGen/R600/llvm.SI.image.sample.o.ll [deleted file]
test/CodeGen/R600/llvm.SI.imageload.ll [deleted file]
test/CodeGen/R600/llvm.SI.load.dword.ll [deleted file]
test/CodeGen/R600/llvm.SI.resinfo.ll [deleted file]
test/CodeGen/R600/llvm.SI.sample-masked.ll [deleted file]
test/CodeGen/R600/llvm.SI.sample.ll [deleted file]
test/CodeGen/R600/llvm.SI.sampled.ll [deleted file]
test/CodeGen/R600/llvm.SI.sendmsg-m0.ll [deleted file]
test/CodeGen/R600/llvm.SI.sendmsg.ll [deleted file]
test/CodeGen/R600/llvm.SI.tbuffer.store.ll [deleted file]
test/CodeGen/R600/llvm.SI.tid.ll [deleted file]
test/CodeGen/R600/llvm.amdgpu.dp4.ll [deleted file]
test/CodeGen/R600/llvm.amdgpu.kilp.ll [deleted file]
test/CodeGen/R600/llvm.amdgpu.lrp.ll [deleted file]
test/CodeGen/R600/llvm.cos.ll [deleted file]
test/CodeGen/R600/llvm.exp2.ll [deleted file]
test/CodeGen/R600/llvm.log2.ll [deleted file]
test/CodeGen/R600/llvm.memcpy.ll [deleted file]
test/CodeGen/R600/llvm.pow.ll [deleted file]
test/CodeGen/R600/llvm.rint.f64.ll [deleted file]
test/CodeGen/R600/llvm.rint.ll [deleted file]
test/CodeGen/R600/llvm.round.f64.ll [deleted file]
test/CodeGen/R600/llvm.round.ll [deleted file]
test/CodeGen/R600/llvm.sin.ll [deleted file]
test/CodeGen/R600/llvm.sqrt.ll [deleted file]
test/CodeGen/R600/load-i1.ll [deleted file]
test/CodeGen/R600/load-input-fold.ll [deleted file]
test/CodeGen/R600/load.ll [deleted file]
test/CodeGen/R600/load.vec.ll [deleted file]
test/CodeGen/R600/load64.ll [deleted file]
test/CodeGen/R600/local-64.ll [deleted file]
test/CodeGen/R600/local-atomics.ll [deleted file]
test/CodeGen/R600/local-atomics64.ll [deleted file]
test/CodeGen/R600/local-memory-two-objects.ll [deleted file]
test/CodeGen/R600/local-memory.ll [deleted file]
test/CodeGen/R600/loop-address.ll [deleted file]
test/CodeGen/R600/loop-idiom.ll [deleted file]
test/CodeGen/R600/lshl.ll [deleted file]
test/CodeGen/R600/lshr.ll [deleted file]
test/CodeGen/R600/m0-spill.ll [deleted file]
test/CodeGen/R600/mad-combine.ll [deleted file]
test/CodeGen/R600/mad-sub.ll [deleted file]
test/CodeGen/R600/mad_int24.ll [deleted file]
test/CodeGen/R600/mad_uint24.ll [deleted file]
test/CodeGen/R600/madak.ll [deleted file]
test/CodeGen/R600/madmk.ll [deleted file]
test/CodeGen/R600/max-literals.ll [deleted file]
test/CodeGen/R600/max.ll [deleted file]
test/CodeGen/R600/max3.ll [deleted file]
test/CodeGen/R600/merge-stores.ll [deleted file]
test/CodeGen/R600/min.ll [deleted file]
test/CodeGen/R600/min3.ll [deleted file]
test/CodeGen/R600/missing-store.ll [deleted file]
test/CodeGen/R600/mubuf.ll [deleted file]
test/CodeGen/R600/mul.ll [deleted file]
test/CodeGen/R600/mul_int24.ll [deleted file]
test/CodeGen/R600/mul_uint24.ll [deleted file]
test/CodeGen/R600/mulhu.ll [deleted file]
test/CodeGen/R600/no-initializer-constant-addrspace.ll [deleted file]
test/CodeGen/R600/no-shrink-extloads.ll [deleted file]
test/CodeGen/R600/operand-folding.ll [deleted file]
test/CodeGen/R600/operand-spacing.ll [deleted file]
test/CodeGen/R600/or.ll [deleted file]
test/CodeGen/R600/packetizer.ll [deleted file]
test/CodeGen/R600/parallelandifcollapse.ll [deleted file]
test/CodeGen/R600/parallelorifcollapse.ll [deleted file]
test/CodeGen/R600/predicate-dp4.ll [deleted file]
test/CodeGen/R600/predicates.ll [deleted file]
test/CodeGen/R600/private-memory-atomics.ll [deleted file]
test/CodeGen/R600/private-memory-broken.ll [deleted file]
test/CodeGen/R600/private-memory.ll [deleted file]
test/CodeGen/R600/pv-packing.ll [deleted file]
test/CodeGen/R600/pv.ll [deleted file]
test/CodeGen/R600/r600-encoding.ll [deleted file]
test/CodeGen/R600/r600-export-fix.ll [deleted file]
test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll [deleted file]
test/CodeGen/R600/r600cfg.ll [deleted file]
test/CodeGen/R600/reciprocal.ll [deleted file]
test/CodeGen/R600/register-count-comments.ll [deleted file]
test/CodeGen/R600/reorder-stores.ll [deleted file]
test/CodeGen/R600/rotl.i64.ll [deleted file]
test/CodeGen/R600/rotl.ll [deleted file]
test/CodeGen/R600/rotr.i64.ll [deleted file]
test/CodeGen/R600/rotr.ll [deleted file]
test/CodeGen/R600/rsq.ll [deleted file]
test/CodeGen/R600/rv7x0_count3.ll [deleted file]
test/CodeGen/R600/s_movk_i32.ll [deleted file]
test/CodeGen/R600/saddo.ll [deleted file]
test/CodeGen/R600/salu-to-valu.ll [deleted file]
test/CodeGen/R600/scalar_to_vector.ll [deleted file]
test/CodeGen/R600/schedule-fs-loop-nested-if.ll [deleted file]
test/CodeGen/R600/schedule-fs-loop-nested.ll [deleted file]
test/CodeGen/R600/schedule-fs-loop.ll [deleted file]
test/CodeGen/R600/schedule-global-loads.ll [deleted file]
test/CodeGen/R600/schedule-if-2.ll [deleted file]
test/CodeGen/R600/schedule-if.ll [deleted file]
test/CodeGen/R600/schedule-kernel-arg-loads.ll [deleted file]
test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll [deleted file]
test/CodeGen/R600/schedule-vs-if-nested-loop.ll [deleted file]
test/CodeGen/R600/scratch-buffer.ll [deleted file]
test/CodeGen/R600/sdiv.ll [deleted file]
test/CodeGen/R600/sdivrem24.ll [deleted file]
test/CodeGen/R600/sdivrem64.ll [deleted file]
test/CodeGen/R600/select-i1.ll [deleted file]
test/CodeGen/R600/select-vectors.ll [deleted file]
test/CodeGen/R600/select.ll [deleted file]
test/CodeGen/R600/select64.ll [deleted file]
test/CodeGen/R600/selectcc-cnd.ll [deleted file]
test/CodeGen/R600/selectcc-cnde-int.ll [deleted file]
test/CodeGen/R600/selectcc-icmp-select-float.ll [deleted file]
test/CodeGen/R600/selectcc-opt.ll [deleted file]
test/CodeGen/R600/selectcc.ll [deleted file]
test/CodeGen/R600/set-dx10.ll [deleted file]
test/CodeGen/R600/setcc-equivalent.ll [deleted file]
test/CodeGen/R600/setcc-opt.ll [deleted file]
test/CodeGen/R600/setcc.ll [deleted file]
test/CodeGen/R600/setcc64.ll [deleted file]
test/CodeGen/R600/seto.ll [deleted file]
test/CodeGen/R600/setuo.ll [deleted file]
test/CodeGen/R600/sext-eliminate.ll [deleted file]
test/CodeGen/R600/sext-in-reg.ll [deleted file]
test/CodeGen/R600/sgpr-control-flow.ll [deleted file]
test/CodeGen/R600/sgpr-copy-duplicate-operand.ll [deleted file]
test/CodeGen/R600/sgpr-copy.ll [deleted file]
test/CodeGen/R600/shared-op-cycle.ll [deleted file]
test/CodeGen/R600/shl.ll [deleted file]
test/CodeGen/R600/shl_add_constant.ll [deleted file]
test/CodeGen/R600/shl_add_ptr.ll [deleted file]
test/CodeGen/R600/si-annotate-cf-assertion.ll [deleted file]
test/CodeGen/R600/si-annotate-cf.ll [deleted file]
test/CodeGen/R600/si-lod-bias.ll [deleted file]
test/CodeGen/R600/si-sgpr-spill.ll [deleted file]
test/CodeGen/R600/si-spill-cf.ll [deleted file]
test/CodeGen/R600/si-triv-disjoint-mem-access.ll [deleted file]
test/CodeGen/R600/si-vector-hang.ll [deleted file]
test/CodeGen/R600/sign_extend.ll [deleted file]
test/CodeGen/R600/simplify-demanded-bits-build-pair.ll [deleted file]
test/CodeGen/R600/sint_to_fp.f64.ll [deleted file]
test/CodeGen/R600/sint_to_fp.ll [deleted file]
test/CodeGen/R600/smrd.ll [deleted file]
test/CodeGen/R600/split-scalar-i64-add.ll [deleted file]
test/CodeGen/R600/sra.ll [deleted file]
test/CodeGen/R600/srem.ll [deleted file]
test/CodeGen/R600/srl.ll [deleted file]
test/CodeGen/R600/ssubo.ll [deleted file]
test/CodeGen/R600/store-barrier.ll [deleted file]
test/CodeGen/R600/store-v3i32.ll [deleted file]
test/CodeGen/R600/store-v3i64.ll [deleted file]
test/CodeGen/R600/store-vector-ptrs.ll [deleted file]
test/CodeGen/R600/store.ll [deleted file]
test/CodeGen/R600/store.r600.ll [deleted file]
test/CodeGen/R600/structurize.ll [deleted file]
test/CodeGen/R600/structurize1.ll [deleted file]
test/CodeGen/R600/sub.ll [deleted file]
test/CodeGen/R600/subreg-coalescer-crash.ll [deleted file]
test/CodeGen/R600/subreg-eliminate-dead.ll [deleted file]
test/CodeGen/R600/swizzle-export.ll [deleted file]
test/CodeGen/R600/tex-clause-antidep.ll [deleted file]
test/CodeGen/R600/texture-input-merge.ll [deleted file]
test/CodeGen/R600/trunc-cmp-constant.ll [deleted file]
test/CodeGen/R600/trunc-store-f64-to-f16.ll [deleted file]
test/CodeGen/R600/trunc-store-i1.ll [deleted file]
test/CodeGen/R600/trunc-vector-store-assertion-failure.ll [deleted file]
test/CodeGen/R600/trunc.ll [deleted file]
test/CodeGen/R600/tti-unroll-prefs.ll [deleted file]
test/CodeGen/R600/uaddo.ll [deleted file]
test/CodeGen/R600/udiv.ll [deleted file]
test/CodeGen/R600/udivrem.ll [deleted file]
test/CodeGen/R600/udivrem24.ll [deleted file]
test/CodeGen/R600/udivrem64.ll [deleted file]
test/CodeGen/R600/uint_to_fp.f64.ll [deleted file]
test/CodeGen/R600/uint_to_fp.ll [deleted file]
test/CodeGen/R600/unaligned-load-store.ll [deleted file]
test/CodeGen/R600/unhandled-loop-condition-assertion.ll [deleted file]
test/CodeGen/R600/unroll.ll [deleted file]
test/CodeGen/R600/unsupported-cc.ll [deleted file]
test/CodeGen/R600/urecip.ll [deleted file]
test/CodeGen/R600/urem.ll [deleted file]
test/CodeGen/R600/use-sgpr-multiple-times.ll [deleted file]
test/CodeGen/R600/usubo.ll [deleted file]
test/CodeGen/R600/v1i64-kernel-arg.ll [deleted file]
test/CodeGen/R600/v_cndmask.ll [deleted file]
test/CodeGen/R600/valu-i1.ll [deleted file]
test/CodeGen/R600/vector-alloca.ll [deleted file]
test/CodeGen/R600/vertex-fetch-encoding.ll [deleted file]
test/CodeGen/R600/vop-shrink.ll [deleted file]
test/CodeGen/R600/vselect.ll [deleted file]
test/CodeGen/R600/vselect64.ll [deleted file]
test/CodeGen/R600/vtx-fetch-branch.ll [deleted file]
test/CodeGen/R600/vtx-schedule.ll [deleted file]
test/CodeGen/R600/wait.ll [deleted file]
test/CodeGen/R600/work-item-intrinsics.ll [deleted file]
test/CodeGen/R600/wrong-transalu-pos-fix.ll [deleted file]
test/CodeGen/R600/xor.ll [deleted file]
test/CodeGen/R600/zero_extend.ll [deleted file]
test/MC/AMDGPU/ds-err.s [new file with mode: 0644]
test/MC/AMDGPU/ds.s [new file with mode: 0644]
test/MC/AMDGPU/flat.s [new file with mode: 0644]
test/MC/AMDGPU/lit.local.cfg [new file with mode: 0644]
test/MC/AMDGPU/mubuf.s [new file with mode: 0644]
test/MC/AMDGPU/smrd.s [new file with mode: 0644]
test/MC/AMDGPU/sop1-err.s [new file with mode: 0644]
test/MC/AMDGPU/sop1.s [new file with mode: 0644]
test/MC/AMDGPU/sop2.s [new file with mode: 0644]
test/MC/AMDGPU/sopc.s [new file with mode: 0644]
test/MC/AMDGPU/sopk.s [new file with mode: 0644]
test/MC/AMDGPU/sopp.s [new file with mode: 0644]
test/MC/AMDGPU/vop1.s [new file with mode: 0644]
test/MC/AMDGPU/vop2-err.s [new file with mode: 0644]
test/MC/AMDGPU/vop2.s [new file with mode: 0644]
test/MC/AMDGPU/vop3-errs.s [new file with mode: 0644]
test/MC/AMDGPU/vop3.s [new file with mode: 0644]
test/MC/AMDGPU/vopc.s [new file with mode: 0644]
test/MC/R600/ds-err.s [deleted file]
test/MC/R600/ds.s [deleted file]
test/MC/R600/flat.s [deleted file]
test/MC/R600/lit.local.cfg [deleted file]
test/MC/R600/mubuf.s [deleted file]
test/MC/R600/smrd.s [deleted file]
test/MC/R600/sop1-err.s [deleted file]
test/MC/R600/sop1.s [deleted file]
test/MC/R600/sop2.s [deleted file]
test/MC/R600/sopc.s [deleted file]
test/MC/R600/sopk.s [deleted file]
test/MC/R600/sopp.s [deleted file]
test/MC/R600/vop1.s [deleted file]
test/MC/R600/vop2-err.s [deleted file]
test/MC/R600/vop2.s [deleted file]
test/MC/R600/vop3-errs.s [deleted file]
test/MC/R600/vop3.s [deleted file]
test/MC/R600/vopc.s [deleted file]

index 026fe47..da73149 100644 (file)
@@ -176,6 +176,7 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
 
 set(LLVM_ALL_TARGETS
   AArch64
+  AMDGPU
   ARM
   BPF
   CppBackend
@@ -184,7 +185,6 @@ set(LLVM_ALL_TARGETS
   MSP430
   NVPTX
   PowerPC
-  R600
   Sparc
   SystemZ
   X86
index 11ba051..5b70fbd 100644 (file)
@@ -1097,7 +1097,7 @@ if test "$llvm_cv_enable_crash_overrides" = "yes" ; then
 fi
 
 dnl List all possible targets
-ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600 BPF"
+ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ AMDGPU BPF"
 AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
 
 dnl Allow specific targets to be specified for building (or not)
@@ -1132,7 +1132,8 @@ case "$enableval" in
         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
         systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
-        r600)     TARGETS_TO_BUILD="R600 $TARGETS_TO_BUILD" ;;
+        amdgpu)  ;&
+        r600)     TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
index 6cb9f2d..73fce67 100755 (executable)
--- a/configure
+++ b/configure
@@ -5628,7 +5628,7 @@ _ACEOF
 
 fi
 
-ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600 BPF"
+ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ AMDGPU BPF"
 ALL_TARGETS=$ALL_TARGETS
 
 
@@ -5665,7 +5665,8 @@ case "$enableval" in
         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
         systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
-        r600)     TARGETS_TO_BUILD="R600 $TARGETS_TO_BUILD" ;;
+        amdgpu)  ;&
+        r600)     TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst
new file mode 100644 (file)
index 0000000..3cb41ce
--- /dev/null
@@ -0,0 +1,94 @@
+==============================
+User Guide for AMDGPU Back-end
+==============================
+
+Introduction
+============
+
+The AMDGPU back-end provides ISA code generation for AMD GPUs, starting with
+the R600 family up until the current Volcanic Islands (GCN Gen 3).
+
+
+Assembler
+=========
+
+The assembler is currently considered experimental.
+
+For syntax examples look in test/MC/AMDGPU.
+
+Below some of the currently supported features (modulo bugs).  These
+all apply to the Southern Islands ISA, Sea Islands and Volcanic Islands
+are also supported but may be missing some instructions and have more bugs:
+
+DS Instructions
+---------------
+All DS instructions are supported.
+
+FLAT Instructions
+------------------
+These instructions are only present in the Sea Islands and Volcanic Islands
+instruction set.  All FLAT instructions are supported for these architectures
+
+MUBUF Instructions
+------------------
+All non-atomic MUBUF instructions are supported.
+
+SMRD Instructions
+-----------------
+Only the s_load_dword* SMRD instructions are supported.
+
+SOP1 Instructions
+-----------------
+All SOP1 instructions are supported.
+
+SOP2 Instructions
+-----------------
+All SOP2 instructions are supported.
+
+SOPC Instructions
+-----------------
+All SOPC instructions are supported.
+
+SOPP Instructions
+-----------------
+
+Unless otherwise mentioned, all SOPP instructions that have one or more
+operands accept integer operands only.  No verification is performed
+on the operands, so it is up to the programmer to be familiar with the
+range or acceptable values.
+
+s_waitcnt
+^^^^^^^^^
+
+s_waitcnt accepts named arguments to specify which memory counter(s) to
+wait for.
+
+.. code-block:: nasm
+
+   // Wait for all counters to be 0
+   s_waitcnt 0
+
+   // Equivalent to s_waitcnt 0.  Counter names can also be delimited by
+   // '&' or ','.
+   s_waitcnt vmcnt(0) expcnt(0) lgkcmt(0)
+
+   // Wait for vmcnt counter to be 1.
+   s_waitcnt vmcnt(1)
+
+VOP1, VOP2, VOP3, VOPC Instructions
+-----------------------------------
+
+All 32-bit and 64-bit encodings should work.
+
+The assembler will automatically detect which encoding size to use for
+VOP1, VOP2, and VOPC instructions based on the operands.  If you want to force
+a specific encoding size, you can add an _e32 (for 32-bit encoding) or
+_e64 (for 64-bit encoding) suffix to the instruction.  Most, but not all
+instructions support an explicit suffix.  These are all valid assembly
+strings:
+
+.. code-block:: nasm
+
+   v_mul_i32_i24 v1, v2, v3
+   v_mul_i32_i24_e32 v1, v2, v3
+   v_mul_i32_i24_e64 v1, v2, v3
index 2dfdc9b..900ba24 100644 (file)
@@ -68,8 +68,8 @@ Other documents, collections, notes
 * `PowerPC64 alignment of long doubles (from GCC) <http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00997.html>`_
 * `Long branch stubs for powerpc64-linux (from binutils) <http://sources.redhat.com/ml/binutils/2002-04/msg00573.html>`_
 
-R600
-----
+AMDGPU
+------
 
 * `AMD R6xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf>`_
 * `AMD R7xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R700-Family_Instruction_Set_Architecture.pdf>`_
index 18b3c1d..212fa0b 100644 (file)
@@ -711,7 +711,7 @@ used by people developing LLVM.
 |                         | as ``LLVM_ALL_TARGETS``, and can be set to include |
 |                         | out-of-tree targets. The default value includes:   |
 |                         | ``AArch64, ARM, CppBackend, Hexagon,               |
-|                         | Mips, MSP430, NVPTX, PowerPC, R600, Sparc,         |
+|                         | Mips, MSP430, NVPTX, PowerPC, AMDGPU, Sparc,       |
 |                         | SystemZ, X86, XCore``.                             |
 +-------------------------+----------------------------------------------------+
 | LLVM_ENABLE_DOXYGEN     | Build doxygen-based documentation from the source  |
diff --git a/docs/R600Usage.rst b/docs/R600Usage.rst
deleted file mode 100644 (file)
index 9bd16f4..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-============================
-User Guide for R600 Back-end
-============================
-
-Introduction
-============
-
-The R600 back-end provides ISA code generation for AMD GPUs, starting with
-the R600 family up until the current Volcanic Islands (GCN Gen 3).
-
-
-Assembler
-=========
-
-The assembler is currently considered experimental.
-
-For syntax examples look in test/MC/R600.
-
-Below some of the currently supported features (modulo bugs).  These
-all apply to the Southern Islands ISA, Sea Islands and Volcanic Islands
-are also supported but may be missing some instructions and have more bugs:
-
-DS Instructions
----------------
-All DS instructions are supported.
-
-FLAT Instructions
-------------------
-These instructions are only present in the Sea Islands and Volcanic Islands
-instruction set.  All FLAT instructions are supported for these architectures
-
-MUBUF Instructions
-------------------
-All non-atomic MUBUF instructions are supported.
-
-SMRD Instructions
------------------
-Only the s_load_dword* SMRD instructions are supported.
-
-SOP1 Instructions
------------------
-All SOP1 instructions are supported.
-
-SOP2 Instructions
------------------
-All SOP2 instructions are supported.
-
-SOPC Instructions
------------------
-All SOPC instructions are supported.
-
-SOPP Instructions
------------------
-
-Unless otherwise mentioned, all SOPP instructions that have one or more
-operands accept integer operands only.  No verification is performed
-on the operands, so it is up to the programmer to be familiar with the
-range or acceptable values.
-
-s_waitcnt
-^^^^^^^^^
-
-s_waitcnt accepts named arguments to specify which memory counter(s) to
-wait for.
-
-.. code-block:: nasm
-
-   // Wait for all counters to be 0
-   s_waitcnt 0
-
-   // Equivalent to s_waitcnt 0.  Counter names can also be delimited by
-   // '&' or ','.
-   s_waitcnt vmcnt(0) expcnt(0) lgkcmt(0)
-
-   // Wait for vmcnt counter to be 1.
-   s_waitcnt vmcnt(1)
-
-VOP1, VOP2, VOP3, VOPC Instructions
------------------------------------
-
-All 32-bit and 64-bit encodings should work.
-
-The assembler will automatically detect which encoding size to use for
-VOP1, VOP2, and VOPC instructions based on the operands.  If you want to force
-a specific encoding size, you can add an _e32 (for 32-bit encoding) or
-_e64 (for 64-bit encoding) suffix to the instruction.  Most, but not all
-instructions support an explicit suffix.  These are all valid assembly
-strings:
-
-.. code-block:: nasm
-
-   v_mul_i32_i24 v1, v2, v3
-   v_mul_i32_i24_e32 v1, v2, v3
-   v_mul_i32_i24_e64 v1, v2, v3
index 2cc5b8b..0b68118 100644 (file)
@@ -252,7 +252,7 @@ For API clients and LLVM developers.
    WritingAnLLVMPass
    HowToUseAttributes
    NVPTXUsage
-   R600Usage
+   AMDGPUUsage
    StackMaps
    InAlloca
    BigEndianNEON
@@ -338,8 +338,8 @@ For API clients and LLVM developers.
 :doc:`NVPTXUsage`
    This document describes using the NVPTX back-end to compile GPU kernels.
 
-:doc:`R600Usage`
-   This document describes how to use the R600 back-end.
+:doc:`AMDGPUUsage`
+   This document describes how to use the AMDGPU back-end.
 
 :doc:`StackMaps`
   LLVM support for mapping instruction addresses to the location of
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
new file mode 100644 (file)
index 0000000..0a05d25
--- /dev/null
@@ -0,0 +1,148 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPU_H
+#define LLVM_LIB_TARGET_R600_AMDGPU_H
+
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AMDGPUInstrPrinter;
+class AMDGPUSubtarget;
+class AMDGPUTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+// R600 Passes
+FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
+FunctionPass *createR600TextureIntrinsicsReplacer();
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers();
+FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
+FunctionPass *createR600Packetizer(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
+FunctionPass *createAMDGPUCFGStructurizerPass();
+
+// SI Passes
+FunctionPass *createSITypeRewriter();
+FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSIFoldOperandsPass();
+FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIShrinkInstructionsPass();
+FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSIFixControlFlowLiveIntervalsPass();
+FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRLiveRangesPass();
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSIInsertWaits(TargetMachine &tm);
+FunctionPass *createSIPrepareScratchRegs();
+
+void initializeSIFoldOperandsPass(PassRegistry &);
+extern char &SIFoldOperandsID;
+
+void initializeSILowerI1CopiesPass(PassRegistry &);
+extern char &SILowerI1CopiesID;
+
+void initializeSILoadStoreOptimizerPass(PassRegistry &);
+extern char &SILoadStoreOptimizerID;
+
+// Passes common to R600 and SI
+FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
+Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+ModulePass *createAMDGPUAlwaysInlinePass();
+
+void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
+extern char &SIFixControlFlowLiveIntervalsID;
+
+void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
+extern char &SIFixSGPRLiveRangesID;
+
+
+extern Target TheAMDGPUTarget;
+extern Target TheGCNTarget;
+
+namespace AMDGPU {
+enum TargetIndex {
+  TI_CONSTDATA_START,
+  TI_SCRATCH_RSRC_DWORD0,
+  TI_SCRATCH_RSRC_DWORD1,
+  TI_SCRATCH_RSRC_DWORD2,
+  TI_SCRATCH_RSRC_DWORD3
+};
+}
+
+#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
+
+} // End namespace llvm
+
+namespace ShaderType {
+  enum Type {
+    PIXEL = 0,
+    VERTEX = 1,
+    GEOMETRY = 2,
+    COMPUTE = 3
+  };
+}
+
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a separate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces : unsigned {
+  PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
+  GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
+  CONSTANT_ADDRESS = 2, ///< Address space for constant memory
+  LOCAL_ADDRESS    = 3, ///< Address space for local memory.
+  FLAT_ADDRESS     = 4, ///< Address space for flat memory.
+  REGION_ADDRESS   = 5, ///< Address space for region memory.
+  PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
+  PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+
+  // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on this
+  // order to be able to dynamically index a constant buffer, for example:
+  //
+  // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
+
+  CONSTANT_BUFFER_0 = 8,
+  CONSTANT_BUFFER_1 = 9,
+  CONSTANT_BUFFER_2 = 10,
+  CONSTANT_BUFFER_3 = 11,
+  CONSTANT_BUFFER_4 = 12,
+  CONSTANT_BUFFER_5 = 13,
+  CONSTANT_BUFFER_6 = 14,
+  CONSTANT_BUFFER_7 = 15,
+  CONSTANT_BUFFER_8 = 16,
+  CONSTANT_BUFFER_9 = 17,
+  CONSTANT_BUFFER_10 = 18,
+  CONSTANT_BUFFER_11 = 19,
+  CONSTANT_BUFFER_12 = 20,
+  CONSTANT_BUFFER_13 = 21,
+  CONSTANT_BUFFER_14 = 22,
+  CONSTANT_BUFFER_15 = 23,
+  ADDRESS_NONE = 24, ///< Address space for unknown memory.
+  LAST_ADDRESS = ADDRESS_NONE,
+
+  // Some places use this if the address space can't be determined.
+  UNKNOWN_ADDRESS_SPACE = ~0u
+};
+
+} // namespace AMDGPUAS
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
new file mode 100644 (file)
index 0000000..2e7e39a
--- /dev/null
@@ -0,0 +1,266 @@
+//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+// Debugging Features
+
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+        "DumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+        "DumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+        "EnableIRStructurizer",
+        "false",
+        "Disable IR Structurizer">;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+        "EnablePromoteAlloca",
+        "true",
+        "Enable promote alloca pass">;
+
+// Target features
+
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+        "EnableIfCvt",
+        "false",
+        "Disable the if conversion pass">;
+
+def FeatureFP64 : SubtargetFeature<"fp64",
+        "FP64",
+        "true",
+        "Enable double precision operations">;
+
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+        "FP64Denormals",
+        "true",
+        "Enable double precision denormal handling",
+        [FeatureFP64]>;
+
+def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
+        "FastFMAF32",
+        "true",
+        "Assuming f32 fma is at least as fast as mul + add",
+        []>;
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+        "FP32Denormals",
+        "true",
+        "Enable single precision denormal handling">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+        "Is64bit",
+        "true",
+        "Specify if 64-bit addressing should be used">;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+        "R600ALUInst",
+        "false",
+        "Older version of ALU instructions encoding">;
+
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+        "HasVertexCache",
+        "true",
+        "Specify use of dedicated vertex cache">;
+
+def FeatureCaymanISA : SubtargetFeature<"caymanISA",
+        "CaymanISA",
+        "true",
+        "Use Cayman ISA">;
+
+def FeatureCFALUBug : SubtargetFeature<"cfalubug",
+        "CFALUBug",
+        "true",
+        "GPU has CF_ALU bug">;
+
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+        "EnableLoadStoreOpt",
+        "true",
+        "Enable SI load/store optimizer pass">;
+
+def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
+        "FlatAddressSpace",
+        "true",
+        "Support flat address space">;
+
+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+        "EnableVGPRSpilling",
+        "true",
+        "Enable spilling of VGPRs to scratch memory">;
+
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+        "SGPRInitBug",
+        "true",
+        "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
+class SubtargetFeatureFetchLimit <string Value> :
+                          SubtargetFeature <"fetch"#Value,
+        "TexVTXClauseSize",
+        Value,
+        "Limit the maximum number of fetches in a clause to "#Value>;
+
+def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
+def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
+
+class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
+        "wavefrontsize"#Value,
+        "WavefrontSize",
+        !cast<string>(Value),
+        "The number of threads per wavefront">;
+
+def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
+def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
+def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+
+class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
+      "ldsbankcount"#Value,
+      "LDSBankCount",
+      !cast<string>(Value),
+      "The number of LDS banks per compute unit.">;
+
+def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
+def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+        "localmemorysize"#Value,
+        "LocalMemorySize",
+        !cast<string>(Value),
+        "The size of local memory in bytes">;
+
+def FeatureGCN : SubtargetFeature<"gcn",
+        "IsGCN",
+        "true",
+        "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+        "GCN1Encoding",
+        "true",
+        "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+        "GCN3Encoding",
+        "true",
+        "Encoding format for VI">;
+
+def FeatureCIInsts : SubtargetFeature<"ci-insts",
+        "CIInsts",
+        "true",
+        "Additional intstructions for CI+">;
+
+// Dummy feature used to disable assembler instructions.
+def FeatureDisable : SubtargetFeature<"",
+                                      "FeatureDisable","true",
+                                      "Dummy feature to disable assembler"
+                                      " instructions">;
+
+class SubtargetFeatureGeneration <string Value,
+                                  list<SubtargetFeature> Implies> :
+        SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
+                          Value#" GPU generation", Implies>;
+
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
+def FeatureR600 : SubtargetFeatureGeneration<"R600",
+        [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
+
+def FeatureR700 : SubtargetFeatureGeneration<"R700",
+        [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
+
+def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
+        [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
+
+def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
+        [FeatureFetchLimit16, FeatureWavefrontSize64,
+         FeatureLocalMemorySize32768]
+>;
+
+def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
+         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+         FeatureLDSBankCount32]>;
+
+def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+         FeatureGCN1Encoding, FeatureCIInsts]>;
+
+def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+         FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
+
+//===----------------------------------------------------------------------===//
+
+def AMDGPUInstrInfo : InstrInfo {
+  let guessInstructionProperties = 1;
+  let noNamedPositionallyEncodedOperands = 1;
+}
+
+def AMDGPUAsmParser : AsmParser {
+  // Some of the R600 registers have the same name, so this crashes.
+  // For example T0_XYZW and T0_XY both have the asm name T0.
+  let ShouldEmitMatchRegisterName = 0;
+}
+
+def AMDGPU : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = AMDGPUInstrInfo;
+  let AssemblyParsers = [AMDGPUAsmParser];
+}
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Predicate helper class
+//===----------------------------------------------------------------------===//
+
+def TruePredicate : Predicate<"true">;
+def isSICI : Predicate<
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+
+class PredicateControl {
+  Predicate SubtargetPredicate;
+  Predicate SIAssemblerPredicate = isSICI;
+  list<Predicate> AssemblerPredicates = [];
+  Predicate AssemblerPredicate = TruePredicate;
+  list<Predicate> OtherPredicates = [];
+  list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
+                                            AssemblerPredicates,
+                                            OtherPredicates);
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
+include "AMDGPUCallingConv.td"
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
new file mode 100644 (file)
index 0000000..0b426bc
--- /dev/null
@@ -0,0 +1,67 @@
+//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass marks all internal functions as always_inline and creates
+/// duplicates of all other functions a marks the duplicates as always_inline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAlwaysInline : public ModulePass {
+
+  static char ID;
+
+public:
+  AMDGPUAlwaysInline() : ModulePass(ID) { }
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
+};
+
+} // End anonymous namespace
+
+char AMDGPUAlwaysInline::ID = 0;
+
+bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+
+  std::vector<Function*> FuncsToClone;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
+        !F.hasFnAttribute(Attribute::NoInline))
+      FuncsToClone.push_back(&F);
+  }
+
+  for (Function *F : FuncsToClone) {
+    ValueToValueMapTy VMap;
+    Function *NewFunc = CloneFunction(F, VMap, false);
+    NewFunc->setLinkage(GlobalValue::InternalLinkage);
+    F->getParent()->getFunctionList().push_back(NewFunc);
+    F->replaceAllUsesWith(NewFunc);
+  }
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
+      F.addFnAttr(Attribute::AlwaysInline);
+    }
+  }
+  return false;
+}
+
+ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
+  return new AMDGPUAlwaysInline();
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
new file mode 100644 (file)
index 0000000..29c2da6
--- /dev/null
@@ -0,0 +1,600 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+/// code.  When passed an MCAsmStreamer it prints assembly and when passed
+/// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
+#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+// TODO: This should get the default rounding mode from the kernel. We just set
+// the default here, but this could change if the OpenCL rounding mode pragmas
+// are used.
+//
+// The denormal mode here should match what is reported by the OpenCL runtime
+// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
+// can also be override to flush with the -cl-denorms-are-zero compiler flag.
+//
+// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
+// precision, and leaves single precision to flush all and does not report
+// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
+// CL_FP_DENORM for both.
+//
+// FIXME: It seems some instructions do not support single precision denormals
+// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
+// and sin_f32, cos_f32 on most parts).
+
+// We want to use these instructions, and using fp32 denormals also causes
+// instructions to run at the double precision rate for the device so it's
+// probably best to just report no single precision denormals.
+static uint32_t getFPMode(const MachineFunction &F) {
+  const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
+  // TODO: Is there any real use for the flush in only / flush out only modes?
+
+  uint32_t FP32Denormals =
+    ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+  uint32_t FP64Denormals =
+    ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+  return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
+         FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
+         FP_DENORM_MODE_SP(FP32Denormals) |
+         FP_DENORM_MODE_DP(FP64Denormals);
+}
+
+static AsmPrinter *
+createAMDGPUAsmPrinterPass(TargetMachine &tm,
+                           std::unique_ptr<MCStreamer> &&Streamer) {
+  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
+}
+
+extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
+  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
+}
+
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
+                                   std::unique_ptr<MCStreamer> Streamer)
+    : AsmPrinter(TM, std::move(Streamer)) {}
+
+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+  // This label is used to mark the end of the .text section.
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  OutStreamer->SwitchSection(TLOF.getTextSection());
+  MCSymbol *EndOfTextLabel =
+      OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+  OutStreamer->EmitLabel(EndOfTextLabel);
+}
+
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+
+  // The starting address of all shader programs must be 256 bytes aligned.
+  MF.setAlignment(8);
+
+  SetupMachineFunction(MF);
+
+  MCContext &Context = getObjFileLowering().getContext();
+  MCSectionELF *ConfigSection =
+      Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+  OutStreamer->SwitchSection(ConfigSection);
+
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  SIProgramInfo KernelInfo;
+  if (STM.isAmdHsaOS()) {
+    getSIProgramInfo(KernelInfo, MF);
+    EmitAmdKernelCodeT(MF, KernelInfo);
+    OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
+  } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    getSIProgramInfo(KernelInfo, MF);
+    EmitProgramInfoSI(MF, KernelInfo);
+  } else {
+    EmitProgramInfoR600(MF);
+  }
+
+  DisasmLines.clear();
+  HexLines.clear();
+  DisasmLineMaxLen = 0;
+
+  EmitFunctionBody();
+
+  if (isVerbose()) {
+    MCSectionELF *CommentSection =
+        Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
+    OutStreamer->SwitchSection(CommentSection);
+
+    if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+      OutStreamer->emitRawComment(" Kernel info:", false);
+      OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
+                                  false);
+      OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
+                                  false);
+      OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
+                                  false);
+      OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
+                                  false);
+      OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
+                                  false);
+      OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
+                                  false);
+    } else {
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      OutStreamer->emitRawComment(
+        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
+    }
+  }
+
+  if (STM.dumpCode()) {
+
+    OutStreamer->SwitchSection(
+        Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
+
+    for (size_t i = 0; i < DisasmLines.size(); ++i) {
+      std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
+      Comment += " ; " + HexLines[i] + "\n";
+
+      OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
+      OutStreamer->EmitBytes(StringRef(Comment));
+    }
+  }
+
+  return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
+  unsigned MaxGPR = 0;
+  bool killPixel = false;
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const R600RegisterInfo *RI =
+      static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
+  const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (MI.getOpcode() == AMDGPU::KILLGT)
+        killPixel = true;
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        const MachineOperand &MO = MI.getOperand(op_idx);
+        if (!MO.isReg())
+          continue;
+        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+
+        // Register with value > 127 aren't GPR
+        if (HWReg > 127)
+          continue;
+        MaxGPR = std::max(MaxGPR, HWReg);
+      }
+    }
+  }
+
+  unsigned RsrcReg;
+  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
+    // Evergreen / Northern Islands
+    switch (MFI->getShaderType()) {
+    default: // Fall through
+    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+    }
+  } else {
+    // R600 / R700
+    switch (MFI->getShaderType()) {
+    default: // Fall through
+    case ShaderType::GEOMETRY: // Fall through
+    case ShaderType::COMPUTE:  // Fall through
+    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+    }
+  }
+
+  OutStreamer->EmitIntValue(RsrcReg, 4);
+  OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+                           S_STACK_SIZE(MFI->StackSize), 4);
+  OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+  OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+
+  if (MFI->getShaderType() == ShaderType::COMPUTE) {
+    OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
+    OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
+  }
+}
+
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
+                                        const MachineFunction &MF) const {
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  uint64_t CodeSize = 0;
+  unsigned MaxSGPR = 0;
+  unsigned MaxVGPR = 0;
+  bool VCCUsed = false;
+  bool FlatUsed = false;
+  const SIRegisterInfo *RI =
+      static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      // TODO: CodeSize should account for multiple functions.
+      CodeSize += MI.getDesc().Size;
+
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        const MachineOperand &MO = MI.getOperand(op_idx);
+        unsigned width = 0;
+        bool isSGPR = false;
+
+        if (!MO.isReg()) {
+          continue;
+        }
+        unsigned reg = MO.getReg();
+        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
+           reg == AMDGPU::VCC_HI) {
+          VCCUsed = true;
+          continue;
+        } else if (reg == AMDGPU::FLAT_SCR ||
+                   reg == AMDGPU::FLAT_SCR_LO ||
+                   reg == AMDGPU::FLAT_SCR_HI) {
+          FlatUsed = true;
+          continue;
+        }
+
+        switch (reg) {
+        default: break;
+        case AMDGPU::SCC:
+        case AMDGPU::EXEC:
+        case AMDGPU::M0:
+          continue;
+        }
+
+        if (AMDGPU::SReg_32RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 1;
+        } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 1;
+        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 2;
+        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 3;
+        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 4;
+        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 4;
+        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 8;
+        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 8;
+        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 16;
+        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 16;
+        } else {
+          llvm_unreachable("Unknown register class");
+        }
+        unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
+        unsigned maxUsed = hwReg + width - 1;
+        if (isSGPR) {
+          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+        } else {
+          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+        }
+      }
+    }
+  }
+
+  if (VCCUsed)
+    MaxSGPR += 2;
+
+  if (FlatUsed)
+    MaxSGPR += 2;
+
+  // We found the maximum register index. They start at 0, so add one to get the
+  // number of registers.
+  ProgInfo.NumVGPR = MaxVGPR + 1;
+  ProgInfo.NumSGPR = MaxSGPR + 1;
+
+  if (STM.hasSGPRInitBug()) {
+    if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+      llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+    ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+  }
+
+  ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
+  ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
+  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
+  // register.
+  ProgInfo.FloatMode = getFPMode(MF);
+
+  // XXX: Not quite sure what this does, but sc seems to unset this.
+  ProgInfo.IEEEMode = 0;
+
+  // Do not clamp NAN to 0.
+  ProgInfo.DX10Clamp = 0;
+
+  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
+
+  ProgInfo.FlatUsed = FlatUsed;
+  ProgInfo.VCCUsed = VCCUsed;
+  ProgInfo.CodeLen = CodeSize;
+
+  unsigned LDSAlignShift;
+  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+    // LDS is allocated in 64 dword blocks.
+    LDSAlignShift = 8;
+  } else {
+    // LDS is allocated in 128 dword blocks.
+    LDSAlignShift = 9;
+  }
+
+  unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
+                          MFI->getMaximumWorkGroupSize(MF);
+
+  ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+  ProgInfo.LDSBlocks =
+     RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+
+  // Scratch is allocated in 256 dword blocks.
+  unsigned ScratchAlignShift = 10;
+  // We need to program the hardware with the amount of scratch memory that
+  // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
+  // scratch memory used per thread.
+  ProgInfo.ScratchBlocks =
+    RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
+                       1 << ScratchAlignShift) >> ScratchAlignShift;
+
+  ProgInfo.ComputePGMRSrc1 =
+      S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
+      S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
+      S_00B848_PRIORITY(ProgInfo.Priority) |
+      S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
+      S_00B848_PRIV(ProgInfo.Priv) |
+      S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
+      S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+      S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+
+  ProgInfo.ComputePGMRSrc2 =
+      S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
+      S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
+      S_00B84C_TGID_X_EN(1) |
+      S_00B84C_TGID_Y_EN(1) |
+      S_00B84C_TGID_Z_EN(1) |
+      S_00B84C_TG_SIZE_EN(1) |
+      S_00B84C_TIDIG_COMP_CNT(2) |
+      S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
+}
+
+static unsigned getRsrcReg(unsigned ShaderType) {
+  switch (ShaderType) {
+  default: // Fall through
+  case ShaderType::COMPUTE:  return R_00B848_COMPUTE_PGM_RSRC1;
+  case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
+  case ShaderType::PIXEL:    return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
+  case ShaderType::VERTEX:   return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
+  }
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
+                                         const SIProgramInfo &KernelInfo) {
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
+
+  if (MFI->getShaderType() == ShaderType::COMPUTE) {
+    OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+
+    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
+
+    OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
+    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
+
+    OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
+    OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+
+    // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
+    // 0" comment but I don't see a corresponding field in the register spec.
+  } else {
+    OutStreamer->EmitIntValue(RsrcReg, 4);
+    OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
+                              S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+    if (STM.isVGPRSpillingEnabled(MFI)) {
+      OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+      OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+    }
+  }
+
+  if (MFI->getShaderType() == ShaderType::PIXEL) {
+    OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
+    OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
+    OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+    OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
+  }
+}
+
+void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
+                                        const SIProgramInfo &KernelInfo) const {
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  amd_kernel_code_t header;
+
+  memset(&header, 0, sizeof(header));
+
+  header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
+  header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
+
+  header.struct_byte_size = sizeof(amd_kernel_code_t);
+
+  header.target_chip = STM.getAmdKernelCodeChipID();
+
+  header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+
+  header.compute_pgm_resource_registers =
+      KernelInfo.ComputePGMRSrc1 |
+      (KernelInfo.ComputePGMRSrc2 << 32);
+
+  // Code Properties:
+  header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+                           AMD_CODE_PROPERTY_IS_PTR64;
+
+  if (KernelInfo.FlatUsed)
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+  if (KernelInfo.ScratchBlocks)
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
+  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+  header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+
+  // MFI->ABIArgOffset is the number of bytes for the kernel arguments
+  // plus 36.  36 is the number of bytes reserved at the begining of the
+  // input buffer to store work-group size information.
+  // FIXME: We should be adding the size of the implicit arguments
+  // to this value.
+  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+
+  header.wavefront_sgpr_count = KernelInfo.NumSGPR;
+  header.workitem_vgpr_count = KernelInfo.NumVGPR;
+
+  // FIXME: What values do I put for these alignments
+  header.kernarg_segment_alignment = 0;
+  header.group_segment_alignment = 0;
+  header.private_segment_alignment = 0;
+
+  header.code_type = 1; // HSA_EXT_CODE_KERNEL
+
+  header.wavefront_size = STM.getWavefrontSize();
+
+  MCSectionELF *VersionSection =
+      OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
+  OutStreamer->SwitchSection(VersionSection);
+  OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
+                         Twine(header.hsail_version_major) + "." +
+                         Twine(header.hsail_version_minor) + ":" +
+                         "AMD:" +
+                         Twine(header.amd_code_version_major) + "." +
+                         Twine(header.amd_code_version_minor) +  ":" +
+                         "GFX8.1:0").str());
+
+  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+
+  if (isVerbose()) {
+    OutStreamer->emitRawComment("amd_code_version_major = " +
+                                Twine(header.amd_code_version_major), false);
+    OutStreamer->emitRawComment("amd_code_version_minor = " +
+                                Twine(header.amd_code_version_minor), false);
+    OutStreamer->emitRawComment("struct_byte_size = " +
+                                Twine(header.struct_byte_size), false);
+    OutStreamer->emitRawComment("target_chip = " +
+                                Twine(header.target_chip), false);
+    OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
+                                Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
+                                false);
+    OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
+                                Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
+                                false);
+    OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
+      Twine((bool)(header.code_properties &
+                   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
+    OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
+      Twine((bool)(header.code_properties &
+                   AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
+    OutStreamer->emitRawComment("private_element_size = 2 ", false);
+    OutStreamer->emitRawComment("is_ptr64 = " +
+        Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
+    OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
+                                Twine(header.workitem_private_segment_byte_size),
+                                false);
+    OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
+                                Twine(header.workgroup_group_segment_byte_size),
+                                false);
+    OutStreamer->emitRawComment("gds_segment_byte_size = " +
+                                Twine(header.gds_segment_byte_size), false);
+    OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
+                                Twine(header.kernarg_segment_byte_size), false);
+    OutStreamer->emitRawComment("wavefront_sgpr_count = " +
+                                Twine(header.wavefront_sgpr_count), false);
+    OutStreamer->emitRawComment("workitem_vgpr_count = " +
+                                Twine(header.workitem_vgpr_count), false);
+    OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
+    OutStreamer->emitRawComment("wavefront_size = " +
+                                Twine((int)header.wavefront_size), false);
+    OutStreamer->emitRawComment("optimization_level = " +
+                                Twine(header.optimization_level), false);
+    OutStreamer->emitRawComment("hsail_profile = " +
+                                Twine(header.hsail_profile), false);
+    OutStreamer->emitRawComment("hsail_machine_model = " +
+                                Twine(header.hsail_machine_model), false);
+    OutStreamer->emitRawComment("hsail_version_major = " +
+                                Twine(header.hsail_version_major), false);
+    OutStreamer->emitRawComment("hsail_version_minor = " +
+                                Twine(header.hsail_version_minor), false);
+  }
+
+  OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+}
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+    case 'r':
+      break;
+    }
+  }
+
+  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+                   *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+  return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
new file mode 100644 (file)
index 0000000..1acff3a
--- /dev/null
@@ -0,0 +1,113 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include <vector>
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+private:
+  struct SIProgramInfo {
+    SIProgramInfo() :
+      VGPRBlocks(0),
+      SGPRBlocks(0),
+      Priority(0),
+      FloatMode(0),
+      Priv(0),
+      DX10Clamp(0),
+      DebugMode(0),
+      IEEEMode(0),
+      ScratchSize(0),
+      ComputePGMRSrc1(0),
+      LDSBlocks(0),
+      ScratchBlocks(0),
+      ComputePGMRSrc2(0),
+      NumVGPR(0),
+      NumSGPR(0),
+      FlatUsed(false),
+      VCCUsed(false),
+      CodeLen(0) {}
+
+    // Fields set in PGM_RSRC1 pm4 packet.
+    uint32_t VGPRBlocks;
+    uint32_t SGPRBlocks;
+    uint32_t Priority;
+    uint32_t FloatMode;
+    uint32_t Priv;
+    uint32_t DX10Clamp;
+    uint32_t DebugMode;
+    uint32_t IEEEMode;
+    uint32_t ScratchSize;
+
+    uint64_t ComputePGMRSrc1;
+
+    // Fields set in PGM_RSRC2 pm4 packet.
+    uint32_t LDSBlocks;
+    uint32_t ScratchBlocks;
+
+    uint64_t ComputePGMRSrc2;
+
+    uint32_t NumVGPR;
+    uint32_t NumSGPR;
+    uint32_t LDSSize;
+    bool FlatUsed;
+
+    // Bonus information for debugging.
+    bool VCCUsed;
+    uint64_t CodeLen;
+  };
+
+  void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
+  void findNumUsedRegistersSI(const MachineFunction &MF,
+                              unsigned &NumSGPR,
+                              unsigned &NumVGPR) const;
+
+  /// \brief Emit register usage information so that the GPU driver
+  /// can correctly setup the GPU state.
+  void EmitProgramInfoR600(const MachineFunction &MF);
+  void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
+  void EmitAmdKernelCodeT(const MachineFunction &MF,
+                          const SIProgramInfo &KernelInfo) const;
+
+public:
+  explicit AMDGPUAsmPrinter(TargetMachine &TM,
+                            std::unique_ptr<MCStreamer> Streamer);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "AMDGPU Assembly Printer";
+  }
+
+  /// Implemented in AMDGPUMCInstLower.cpp
+  void EmitInstruction(const MachineInstr *MI) override;
+
+  void EmitEndOfAsmFile(Module &M) override;
+
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O) override;
+
+protected:
+  std::vector<std::string> DisasmLines, HexLines;
+  size_t DisasmLineMaxLen;
+};
+
+} // End anonymous llvm
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
new file mode 100644 (file)
index 0000000..6ffa7a0
--- /dev/null
@@ -0,0 +1,82 @@
+//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMD Radeon GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+// Inversion of CCIfInReg
+class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+
+// Calling convention for SI
+def CC_SI : CallingConv<[
+
+  CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
+  ]>>>,
+
+  CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>>,
+
+  CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+  ]>>>,
+
+  CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>>
+
+]>;
+
+// Calling convention for R600
+def CC_R600 : CallingConv<[
+  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
+    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+    T30_XYZW, T31_XYZW, T32_XYZW
+  ]>>>
+]>;
+
+// Calling convention for compute kernels
+def CC_AMDGPU_Kernel : CallingConv<[
+  CCCustom<"allocateStack">
+]>;
+
+def CC_AMDGPU : CallingConv<[
+  CCIf<"static_cast<const AMDGPUSubtarget&>"
+        "(State.getMachineFunction().getSubtarget()).getGeneration() >="
+          "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+        "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()"
+         "->getShaderType() == ShaderType::COMPUTE",
+       CCDelegateTo<CC_AMDGPU_Kernel>>,
+  CCIf<"static_cast<const AMDGPUSubtarget&>"
+        "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+          "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+         "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()"
+          "->getShaderType() == ShaderType::COMPUTE",
+        CCDelegateTo<CC_AMDGPU_Kernel>>,
+   CCIf<"static_cast<const AMDGPUSubtarget&>"
+         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
+           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+        CCDelegateTo<CC_SI>>,
+   CCIf<"static_cast<const AMDGPUSubtarget&>"
+          "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+            "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+        CCDelegateTo<CC_R600>>
+]>;
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
new file mode 100644 (file)
index 0000000..8175786
--- /dev/null
@@ -0,0 +1,112 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+    int LAO, unsigned TransAl)
+  : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+  // XXX: Hardcoding to 1 for now.
+  //
+  // I think the StackWidth should stored as metadata associated with the
+  // MachineFunction.  This metadata can either be added by a frontend, or
+  // calculated by a R600 specific LLVM IR pass.
+  //
+  // The StackWidth determines how stack objects are laid out in memory.
+  // For a vector stack variable, like: int4 stack[2], the data will be stored
+  // in the following ways depending on the StackWidth.
+  //
+  // StackWidth = 1:
+  //
+  // T0.X = stack[0].x
+  // T1.X = stack[0].y
+  // T2.X = stack[0].z
+  // T3.X = stack[0].w
+  // T4.X = stack[1].x
+  // T5.X = stack[1].y
+  // T6.X = stack[1].z
+  // T7.X = stack[1].w
+  //
+  // StackWidth = 2:
+  //
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T1.X = stack[0].z
+  // T1.Y = stack[0].w
+  // T2.X = stack[1].x
+  // T2.Y = stack[1].y
+  // T3.X = stack[1].z
+  // T3.Y = stack[1].w
+  // 
+  // StackWidth = 4:
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T0.Z = stack[0].z
+  // T0.W = stack[0].w
+  // T1.X = stack[1].x
+  // T1.Y = stack[1].y
+  // T1.Z = stack[1].z
+  // T1.W = stack[1].w
+  return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Start the offset at 2 so we don't overwrite work group information.
+  // XXX: We should only do this when the shader actually uses this
+  // information.
+  unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
+  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+    OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));
+    OffsetBytes += MFI->getObjectSize(i);
+    // Each register holds 4 bytes, so we must always align the offset to at
+    // least 4 bytes, so that 2 frame objects won't share the same register.
+    OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);
+  }
+
+  if (FI != -1)
+    OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(FI));
+
+  return OffsetBytes / (getStackWidth(MF) * 4);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+  NumEntries = 0;
+  return nullptr;
+}
+void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
new file mode 100644 (file)
index 0000000..9f31be1
--- /dev/null
@@ -0,0 +1,45 @@
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1);
+  virtual ~AMDGPUFrameLowering();
+
+  /// \returns The number of 32-bit sub-registers that are used when storing
+  /// values to the stack.
+  unsigned getStackWidth(const MachineFunction &MF) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+  const SpillSlot *
+    getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  bool hasFP(const MachineFunction &MF) const override;
+};
+} // namespace llvm
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
new file mode 100644 (file)
index 0000000..df4461e
--- /dev/null
@@ -0,0 +1,1371 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "SIDefines.h"
+#include "SIISelLowering.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+  // make the right decision when generating code for different targets.
+  const AMDGPUSubtarget *Subtarget;
+public:
+  AMDGPUDAGToDAGISel(TargetMachine &TM);
+  virtual ~AMDGPUDAGToDAGISel();
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  SDNode *Select(SDNode *N) override;
+  const char *getPassName() const override;
+  void PostprocessISelDAG() override;
+
+private:
+  bool isInlineImmediate(SDNode *N) const;
+  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
+                   const R600InstrInfo *TII);
+  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+
+  // Complex pattern selectors
+  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+  static bool checkType(const Value *ptr, unsigned int addrspace);
+  static bool checkPrivateAddress(const MachineMemOperand *Op);
+
+  static bool isGlobalStore(const StoreSDNode *N);
+  static bool isFlatStore(const StoreSDNode *N);
+  static bool isPrivateStore(const StoreSDNode *N);
+  static bool isLocalStore(const StoreSDNode *N);
+  static bool isRegionStore(const StoreSDNode *N);
+
+  bool isCPLoad(const LoadSDNode *N) const;
+  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
+  bool isGlobalLoad(const LoadSDNode *N) const;
+  bool isFlatLoad(const LoadSDNode *N) const;
+  bool isParamLoad(const LoadSDNode *N) const;
+  bool isPrivateLoad(const LoadSDNode *N) const;
+  bool isLocalLoad(const LoadSDNode *N) const;
+  bool isRegionLoad(const LoadSDNode *N) const;
+
+  SDNode *glueCopyToM0(SDNode *N) const;
+
+  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+                                       SDValue& Offset);
+  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+                       unsigned OffsetBits) const;
+  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+                                 SDValue &Offset1) const;
+  void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+                   SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
+                   SDValue &TFE) const;
+  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+                         SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+                         SDValue &SLC, SDValue &TFE) const;
+  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+                         SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
+                         SDValue &SLC) const;
+  bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+                          SDValue &SOffset, SDValue &ImmOffset) const;
+  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
+                         SDValue &Offset, SDValue &GLC, SDValue &SLC,
+                         SDValue &TFE) const;
+  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+                         SDValue &Offset, SDValue &GLC) const;
+  SDNode *SelectAddrSpaceCast(SDNode *N);
+  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                       SDValue &Clamp, SDValue &Omod) const;
+
+  bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
+                            SDValue &Omod) const;
+  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
+                                 SDValue &Clamp,
+                                 SDValue &Omod) const;
+
+  SDNode *SelectADD_SUB_I64(SDNode *N);
+  SDNode *SelectDIV_SCALE(SDNode *N);
+
+  SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+                   uint32_t Offset, uint32_t Width);
+  SDNode *SelectS_BFEFromShifts(SDNode *N);
+  SDNode *SelectS_BFE(SDNode *N);
+
+  // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+}  // end anonymous namespace
+
+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
+  return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
+    : SelectionDAGISel(TM) {}
+
+bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+  return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
+  const SITargetLowering *TL
+      = static_cast<const SITargetLowering *>(getTargetLowering());
+  return TL->analyzeImmediate(N) == 0;
+}
+
+/// \brief Determine the register class for \p OpNo
+/// \returns The register class of the virtual register that will be used for
+/// the given operand number \OpNo or NULL if the register class cannot be
+/// determined.
+const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
+                                                          unsigned OpNo) const {
+  if (!N->isMachineOpcode())
+    return nullptr;
+
+  switch (N->getMachineOpcode()) {
+  default: {
+    const MCInstrDesc &Desc =
+        Subtarget->getInstrInfo()->get(N->getMachineOpcode());
+    unsigned OpIdx = Desc.getNumDefs() + OpNo;
+    if (OpIdx >= Desc.getNumOperands())
+      return nullptr;
+    int RegClass = Desc.OpInfo[OpIdx].RegClass;
+    if (RegClass == -1)
+      return nullptr;
+
+    return Subtarget->getRegisterInfo()->getRegClass(RegClass);
+  }
+  case AMDGPU::REG_SEQUENCE: {
+    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    const TargetRegisterClass *SuperRC =
+        Subtarget->getRegisterInfo()->getRegClass(RCID);
+
+    SDValue SubRegOp = N->getOperand(OpNo + 1);
+    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
+    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
+                                                              SubRegIdx);
+  }
+  }
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+  SDValue Addr, SDValue& R1, SDValue& R2) {
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  }
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+  return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
+  }
+  return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+      !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
+                 AMDGPUAS::LOCAL_ADDRESS))
+    return N;
+
+  const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+  // Write max value to m0 before each load operation
+
+  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
+                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+
+  SDValue Glue = M0.getValue(1);
+
+  SmallVector <SDValue, 8> Ops;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+     Ops.push_back(N->getOperand(i));
+  }
+  Ops.push_back(Glue);
+  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
+
+  return N;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+  unsigned int Opc = N->getOpcode();
+  if (N->isMachineOpcode()) {
+    N->setNodeId(-1);
+    return nullptr;   // Already selected.
+  }
+
+  if (isa<AtomicSDNode>(N))
+    N = glueCopyToM0(N);
+
+  switch (Opc) {
+  default: break;
+  // We are selecting i64 ADD here instead of custom lower it during
+  // DAG legalization, so we can fold some i64 ADDs used for address
+  // calculation into the LOAD and STORE instructions.
+  case ISD::ADD:
+  case ISD::SUB: {
+    if (N->getValueType(0) != MVT::i64 ||
+        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    return SelectADD_SUB_I64(N);
+  }
+  case ISD::SCALAR_TO_VECTOR:
+  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+  case ISD::BUILD_VECTOR: {
+    unsigned RegClassID;
+    const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
+    EVT VT = N->getValueType(0);
+    unsigned NumVectorElts = VT.getVectorNumElements();
+    EVT EltVT = VT.getVectorElementType();
+    assert(EltVT.bitsEq(MVT::i32));
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+      bool UseVReg = true;
+      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+                                                    U != E; ++U) {
+        if (!U->isMachineOpcode()) {
+          continue;
+        }
+        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+        if (!RC) {
+          continue;
+        }
+        if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
+          UseVReg = false;
+        }
+      }
+      switch(NumVectorElts) {
+      case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
+                                     AMDGPU::SReg_32RegClassID;
+        break;
+      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
+                                     AMDGPU::SReg_64RegClassID;
+        break;
+      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
+                                     AMDGPU::SReg_128RegClassID;
+        break;
+      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
+                                     AMDGPU::SReg_256RegClassID;
+        break;
+      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
+                                      AMDGPU::SReg_512RegClassID;
+        break;
+      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+      }
+    } else {
+      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+      // that adds a 128 bits reg copy when going through TwoAddressInstructions
+      // pass. We want to avoid 128 bits copies as much as possible because they
+      // can't be bundled by our scheduler.
+      switch(NumVectorElts) {
+      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+      case 4:
+        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+        else
+          RegClassID = AMDGPU::R600_Reg128RegClassID;
+        break;
+      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+      }
+    }
+
+    SDLoc DL(N);
+    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+
+    if (NumVectorElts == 1) {
+      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
+                                  N->getOperand(0), RegClass);
+    }
+
+    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+                                  "supported yet");
+    // 16 = Max Num Vector Elements
+    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+    // 1 = Vector Register Class
+    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+
+    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+    bool IsRegSeq = true;
+    unsigned NOps = N->getNumOperands();
+    for (unsigned i = 0; i < NOps; i++) {
+      // XXX: Why is this here?
+      if (isa<RegisterSDNode>(N->getOperand(i))) {
+        IsRegSeq = false;
+        break;
+      }
+      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+      RegSeqArgs[1 + (2 * i) + 1] =
+              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+                                        MVT::i32);
+    }
+
+    if (NOps != NumVectorElts) {
+      // Fill in the missing undef elements if this was a scalar_to_vector.
+      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
+
+      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                     DL, EltVT);
+      for (unsigned i = NOps; i < NumVectorElts; ++i) {
+        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
+        RegSeqArgs[1 + (2 * i) + 1] =
+          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
+      }
+    }
+
+    if (!IsRegSeq)
+      break;
+    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+                                RegSeqArgs);
+  }
+  case ISD::BUILD_PAIR: {
+    SDValue RC, SubReg0, SubReg1;
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+      break;
+    }
+    SDLoc DL(N);
+    if (N->getValueType(0) == MVT::i128) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
+    } else if (N->getValueType(0) == MVT::i64) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+    } else {
+      llvm_unreachable("Unhandled value type for BUILD_PAIR");
+    }
+    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
+                            N->getOperand(1), SubReg1 };
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                  DL, N->getValueType(0), Ops);
+  }
+
+  case ISD::Constant:
+  case ISD::ConstantFP: {
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+      break;
+
+    uint64_t Imm;
+    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
+      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
+    else {
+      ConstantSDNode *C = cast<ConstantSDNode>(N);
+      Imm = C->getZExtValue();
+    }
+
+    SDLoc DL(N);
+    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
+                                                    MVT::i32));
+    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+    const SDValue Ops[] = {
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+    };
+
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+                                  N->getValueType(0), Ops);
+  }
+
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    SDLoc SL(N);
+    EVT VT = N->getValueType(0);
+
+    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
+      N = glueCopyToM0(N);
+      break;
+    }
+
+    // To simplify the TableGen patters, we replace all i64 loads with
+    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
+    // during DAG legalization, however, so places (ExpandUnalignedLoad)
+    // in the DAG legalizer assume that if i64 is legal, so doing this
+    // promotion early can cause problems.
+
+    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
+                                      LD->getBasePtr(), LD->getMemOperand());
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
+                                      MVT::i64, NewLoad);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
+    SDNode *Load = glueCopyToM0(NewLoad.getNode());
+    SelectCode(Load);
+    N = BitCast.getNode();
+    break;
+  }
+
+  case ISD::STORE: {
+    // Handle i64 stores here for the same reason mentioned above for loads.
+    StoreSDNode *ST = cast<StoreSDNode>(N);
+    SDValue Value = ST->getValue();
+    if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
+
+      SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+                                        MVT::v2i32, Value);
+      SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+                                          ST->getBasePtr(), ST->getMemOperand());
+
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+      if (NewValue.getOpcode() == ISD::BITCAST) {
+        Select(NewStore.getNode());
+        return SelectCode(NewValue.getNode());
+      }
+
+      // getNode() may fold the bitcast if its input was another bitcast.  If that
+      // happens we should only select the new store.
+      N = NewStore.getNode();
+    }
+
+    N = glueCopyToM0(N);
+    break;
+  }
+
+  case AMDGPUISD::REGISTER_LOAD: {
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+      break;
+    SDValue Addr, Offset;
+
+    SDLoc DL(N);
+    SelectADDRIndirect(N->getOperand(1), Addr, Offset);
+    const SDValue Ops[] = {
+      Addr,
+      Offset,
+      CurDAG->getTargetConstant(0, DL, MVT::i32),
+      N->getOperand(0),
+    };
+    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
+                                  CurDAG->getVTList(MVT::i32, MVT::i64,
+                                                    MVT::Other),
+                                  Ops);
+  }
+  case AMDGPUISD::REGISTER_STORE: {
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+      break;
+    SDValue Addr, Offset;
+    SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+    SDLoc DL(N);
+    const SDValue Ops[] = {
+      N->getOperand(1),
+      Addr,
+      Offset,
+      CurDAG->getTargetConstant(0, DL, MVT::i32),
+      N->getOperand(0),
+    };
+    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
+                                        CurDAG->getVTList(MVT::Other),
+                                        Ops);
+  }
+
+  case AMDGPUISD::BFE_I32:
+  case AMDGPUISD::BFE_U32: {
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    // There is a scalar version available, but unlike the vector version which
+    // has a separate operand for the offset and width, the scalar version packs
+    // the width and offset into a single operand. Try to move to the scalar
+    // version if the offsets are constant, so that we can try to keep extended
+    // loads of kernel arguments in SGPRs.
+
+    // TODO: Technically we could try to pattern match scalar bitshifts of
+    // dynamic values, but it's probably not useful.
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!Offset)
+      break;
+
+    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
+    if (!Width)
+      break;
+
+    bool Signed = Opc == AMDGPUISD::BFE_I32;
+
+    uint32_t OffsetVal = Offset->getZExtValue();
+    uint32_t WidthVal = Width->getZExtValue();
+
+    return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+                    N->getOperand(0), OffsetVal, WidthVal);
+
+  }
+  case AMDGPUISD::DIV_SCALE: {
+    return SelectDIV_SCALE(N);
+  }
+  case ISD::CopyToReg: {
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+    Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+    break;
+  }
+  case ISD::ADDRSPACECAST:
+    return SelectAddrSpaceCast(N);
+  case ISD::AND:
+  case ISD::SRL:
+  case ISD::SRA:
+    if (N->getValueType(0) != MVT::i32 ||
+        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    return SelectS_BFE(N);
+  }
+
+  return SelectCode(N);
+}
+
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
+  assert(AS != 0 && "Use checkPrivateAddress instead.");
+  if (!Ptr)
+    return false;
+
+  return Ptr->getType()->getPointerAddressSpace() == AS;
+}
+
+bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
+  if (Op->getPseudoValue())
+    return true;
+
+  if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
+    return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+  const Value *MemVal = N->getMemOperand()->getValue();
+  return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
+          !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+          !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
+  const Value *MemVal = N->getMemOperand()->getValue();
+  if (CbId == -1)
+    return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
+
+  return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
+  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+        N->getMemoryVT().bitsLT(MVT::i32))
+      return true;
+
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isFlatLoad(const  LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
+  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
+  MachineMemOperand *MMO = N->getMemOperand();
+  if (checkPrivateAddress(N->getMemOperand())) {
+    if (MMO) {
+      const PseudoSourceValue *PSV = MMO->getPseudoValue();
+      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
+  if (checkPrivateAddress(N->getMemOperand())) {
+    // Check to make sure we are not a constant pool load or a constant load
+    // that is marked as a private load
+    if (isCPLoad(N) || isConstantLoad(N, -1)) {
+      return false;
+    }
+  }
+
+  const Value *MemVal = N->getMemOperand()->getValue();
+  if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
+      !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
+    return true;
+  }
+  return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+  return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+//===----------------------------------------------------------------------===//
+// Complex Patterns
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+                                                         SDValue& IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+                                       true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+    SDValue& BaseReg, SDValue &Offset) {
+  if (!isa<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) {
+  ConstantSDNode *IMMOffset;
+
+  if (Addr.getOpcode() == ISD::ADD
+      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && isInt<16>(IMMOffset->getZExtValue())) {
+
+      Base = Addr.getOperand(0);
+      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                         MVT::i32);
+      return true;
+  // If the pointer address is constant, we can move it to the offset field.
+  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+             && isInt<16>(IMMOffset->getZExtValue())) {
+    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                  SDLoc(CurDAG->getEntryNode()),
+                                  AMDGPU::ZERO, MVT::i32);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                       MVT::i32);
+    return true;
+  }
+
+  // Default case, no offset
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  ConstantSDNode *C;
+  SDLoc DL(Addr);
+
+  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else {
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  }
+
+  return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
+  SDLoc DL(N);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  bool IsAdd = (N->getOpcode() == ISD::ADD);
+
+  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+
+  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, LHS, Sub0);
+  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, LHS, Sub1);
+
+  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, RHS, Sub0);
+  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                       DL, MVT::i32, RHS, Sub1);
+
+  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
+  SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
+
+
+  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
+  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+  SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
+  SDValue Carry(AddLo, 1);
+  SDNode *AddHi
+    = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
+                             SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
+
+  SDValue Args[5] = {
+    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+    SDValue(AddLo,0),
+    Sub0,
+    SDValue(AddHi,0),
+    Sub1,
+  };
+  return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+}
+
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+
+  assert(VT == MVT::f32 || VT == MVT::f64);
+
+  unsigned Opc
+    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+
+  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+  SDValue Ops[8];
+
+  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
+  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+}
+
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+                                         unsigned OffsetBits) const {
+  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+      (OffsetBits == 8 && !isUInt<8>(Offset)))
+    return false;
+
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+    return true;
+
+  // On Southern Islands instruction with a negative base value and an offset
+  // don't seem to work.
+  return CurDAG->SignBitIsZero(Base);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
+                                              SDValue &Offset) const {
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+    if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
+      // (add n0, c0)
+      Base = N0;
+      Offset = N1;
+      return true;
+    }
+  }
+
+  SDLoc DL(Addr);
+
+  // If we have a constant address, prefer to put the constant into the
+  // offset. This can save moves to load the constant address since multiple
+  // operations can share the zero base address register, and enables merging
+  // into read2 / write2 instructions.
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    if (isUInt<16>(CAddr->getZExtValue())) {
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 DL, MVT::i32, Zero);
+      Base = SDValue(MovZero, 0);
+      Offset = Addr;
+      return true;
+    }
+  }
+
+  // default case
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
+                                                   SDValue &Offset0,
+                                                   SDValue &Offset1) const {
+  SDLoc DL(Addr);
+
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+    unsigned DWordOffset0 = C1->getZExtValue() / 4;
+    unsigned DWordOffset1 = DWordOffset0 + 1;
+    // (add n0, c0)
+    if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+      Base = N0;
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+      return true;
+    }
+  }
+
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
+    unsigned DWordOffset1 = DWordOffset0 + 1;
+    assert(4 * DWordOffset0 == CAddr->getZExtValue());
+
+    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      MachineSDNode *MovZero
+        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 DL, MVT::i32, Zero);
+