From: Matt Arsenault Date: Mon, 24 Mar 2014 20:08:09 +0000 (+0000) Subject: R600/SI: Sub-optimial fix for 64-bit immediates with SALU ops. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=3a96e61469fd80bbb2c5bcf2b4dcee89e3a68ab3;p=oota-llvm.git R600/SI: Sub-optimial fix for 64-bit immediates with SALU ops. No longer asserts, but now you get moves loading legal immediates into the split 32-bit operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204661 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 6cc4dee8271..eb5172c896e 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -591,6 +591,28 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, return SubReg; } +MachineOperand SIInstrInfo::buildExtractSubRegOrImm( + MachineBasicBlock::iterator MII, + MachineRegisterInfo &MRI, + MachineOperand &Op, + const TargetRegisterClass *SuperRC, + unsigned SubIdx, + const TargetRegisterClass *SubRC) const { + if (Op.isImm()) { + // XXX - Is there a better way to do this? + if (SubIdx == AMDGPU::sub0) + return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); + if (SubIdx == AMDGPU::sub1) + return MachineOperand::CreateImm(Op.getImm() >> 32); + + llvm_unreachable("Unhandled register index for immediate"); + } + + unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, + SubIdx, SubRC); + return MachineOperand::CreateReg(SubReg, false); +} + unsigned SIInstrInfo::split64BitImm(SmallVectorImpl &Worklist, MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, @@ -998,7 +1020,6 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl &Worklist, MachineBasicBlock &MBB = *Inst->getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - // We shouldn't need to worry about immediate operands here. MachineOperand &Dest = Inst->getOperand(0); MachineOperand &Src0 = Inst->getOperand(1); MachineOperand &Src1 = Inst->getOperand(2); @@ -1009,27 +1030,27 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl &Worklist, const MCInstrDesc &InstDesc = get(Opcode); const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg()); const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0); - unsigned SrcReg0Sub0 = buildExtractSubReg(MII, MRI, Src0, RC, - AMDGPU::sub0, SubRC); - unsigned SrcReg1Sub0 = buildExtractSubReg(MII, MRI, Src1, RC, - AMDGPU::sub0, SubRC); + MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, RC, + AMDGPU::sub0, SubRC); + MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, RC, + AMDGPU::sub0, SubRC); - unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned DestSub0 = MRI.createVirtualRegister(SubRC); MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) - .addReg(SrcReg0Sub0) - .addReg(SrcReg1Sub0); + .addOperand(SrcReg0Sub0) + .addOperand(SrcReg1Sub0); - unsigned SrcReg0Sub1 = buildExtractSubReg(MII, MRI, Src0, RC, - AMDGPU::sub1, SubRC); - unsigned SrcReg1Sub1 = buildExtractSubReg(MII, MRI, Src1, RC, - AMDGPU::sub1, SubRC); + MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, RC, + AMDGPU::sub1, SubRC); + MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, RC, + AMDGPU::sub1, SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned DestSub1 = MRI.createVirtualRegister(SubRC); MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) - .addReg(SrcReg0Sub1) - .addReg(SrcReg1Sub1); + .addOperand(SrcReg0Sub1) + .addOperand(SrcReg1Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + unsigned FullDestReg = MRI.createVirtualRegister(RC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 6eefd3ac98c..7cfb655b36b 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -31,6 +31,12 @@ private: const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const; + MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, + MachineRegisterInfo &MRI, + MachineOperand &SuperReg, + const TargetRegisterClass *SuperRC, + unsigned SubIdx, + const TargetRegisterClass *SubRC) const; unsigned split64BitImm(SmallVectorImpl &Worklist, MachineBasicBlock::iterator MI, @@ -38,7 +44,7 @@ private: const TargetRegisterClass *RC, const MachineOperand &Op) const; - void splitScalar64BitOp(SmallVectorImpl &Worklist, + void splitScalar64BitOp(SmallVectorImpl & Worklist, MachineInstr *Inst, unsigned Opcode) const; diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 05d1e0f0416..8e985c75cbd 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -1,13 +1,13 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s -; EG-CHECK-LABEL: @or_v2i32 -; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG-LABEL: @or_v2i32 +; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK-LABEL: @or_v2i32 -;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI-LABEL: @or_v2i32 +; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -18,17 +18,17 @@ define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) ret void } -; EG-CHECK-LABEL: @or_v4i32 -; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG-LABEL: @or_v4i32 +; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK-LABEL: @or_v4i32 -;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI-LABEL: @or_v4i32 +; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -39,16 +39,16 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) ret void } -; SI-CHECK-LABEL: @scalar_or_i32 -; SI-CHECK: S_OR_B32 +; SI-LABEL: @scalar_or_i32 +; SI: S_OR_B32 define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { %or = or i32 %a, %b store i32 %or, i32 addrspace(1)* %out ret void } -; SI-CHECK-LABEL: @vector_or_i32 -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} +; SI-LABEL: @vector_or_i32 +; SI: V_OR_B32_e32 v{{[0-9]}} define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) { %loada = load i32 addrspace(1)* %a %or = or i32 %loada, %b @@ -56,20 +56,20 @@ define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) ret void } -; EG-CHECK-LABEL: @scalar_or_i64 -; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y -; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z -; SI-CHECK-LABEL: @scalar_or_i64 -; SI-CHECK: S_OR_B64 +; EG-LABEL: @scalar_or_i64 +; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y +; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z +; SI-LABEL: @scalar_or_i64 +; SI: S_OR_B64 define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %or = or i64 %a, %b store i64 %or, i64 addrspace(1)* %out ret void } -; SI-CHECK-LABEL: @vector_or_i64 -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} +; SI-LABEL: @vector_or_i64 +; SI: V_OR_B32_e32 v{{[0-9]}} +; SI: V_OR_B32_e32 v{{[0-9]}} define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64 addrspace(1)* %a, align 8 %loadb = load i64 addrspace(1)* %a, align 8 @@ -78,12 +78,39 @@ define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ret void } -; SI-CHECK-LABEL: @scalar_vector_or_i64 -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} +; SI-LABEL: @scalar_vector_or_i64 +; SI: V_OR_B32_e32 v{{[0-9]}} +; SI: V_OR_B32_e32 v{{[0-9]}} define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) { %loada = load i64 addrspace(1)* %a %or = or i64 %loada, %b store i64 %or, i64 addrspace(1)* %out ret void } + +; SI-LABEL: @vector_or_i64_loadimm +; SI-DAG: S_MOV_B32 +; SI-DAG: S_MOV_B32 +; SI-DAG: BUFFER_LOAD_DWORDX2 +; SI: V_OR_B32_e32 +; SI: V_OR_B32_e32 +; SI: S_ENDPGM +define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, 22470723082367 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FIXME: The or 0 should really be removed. +; SI-LABEL: @vector_or_i64_imm +; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI: V_OR_B32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]] +; SI: V_OR_B32_e32 {{v[0-9]+}}, 0, {{.*}} +; SI: S_ENDPGM +define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, 8 + store i64 %or, i64 addrspace(1)* %out + ret void +}