From: Matt Arsenault Date: Sun, 15 Jun 2014 21:08:58 +0000 (+0000) Subject: R600: Remove a few more things from AMDILISelLowering X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=fa848ccd093a7a18e4fadcd1e6652188731d4f92;p=oota-llvm.git R600: Remove a few more things from AMDILISelLowering Try to keep all the setOperationActions for integer ops together. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211001 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6a3ef750a28..86deae039fa 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -126,11 +126,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - // The hardware supports 32-bit ROTR, but not ROTL. - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - setOperationAction(ISD::ROTR, MVT::i64, Expand); - // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::STORE, MVT::f32, Promote); @@ -223,18 +218,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::SUB, MVT::i64, Expand); - - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Custom); - setOperationAction(ISD::UDIVREM, MVT::i64, Custom); - setOperationAction(ISD::UREM, MVT::i32, Expand); - if (!Subtarget->hasBFI()) { // fcopysign can be done in a single instruction with BFI. setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); @@ -243,8 +226,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SDIV, VT, Custom); + // GPU does not have divrem function for signed or unsigned. setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Custom); // GPU does not have [S|U]MUL_LOHI functions as a single instruction. setOperationAction(ISD::SMUL_LOHI, VT, Expand); @@ -261,6 +248,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : if (!Subtarget->hasBCNT(64)) setOperationAction(ISD::CTPOP, MVT::i64, Expand); + // The hardware supports 32-bit ROTR, but not ROTL. + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::SUB, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); static const MVT::SimpleValueType VectorIntTypes[] = { MVT::v2i32, MVT::v4i32 @@ -280,15 +280,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); setOperationAction(ISD::SUB, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); // TODO: Implement custom UREM / SREM routines. + setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Custom); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::XOR, VT, Expand); diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 0ff11af89aa..fa48e65be4e 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -55,12 +55,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setOperationAction(ISD::BRCOND, VT, Custom); setOperationAction(ISD::BR_JT, VT, Expand); setOperationAction(ISD::BRIND, VT, Expand); - // TODO: Implement custom UREM/SREM routines - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - if (VT != MVT::i64) - setOperationAction(ISD::SDIV, VT, Custom); } for (MVT VT : FloatTypes) { @@ -72,8 +66,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setOperationAction(ISD::SELECT_CC, VT, Expand); } - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); if (STM.hasHWFP64()) { setOperationAction(ISD::ConstantFP, MVT::f64, Legal); setOperationAction(ISD::FABS, MVT::f64, Expand); diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll new file mode 100644 index 00000000000..65e33952d29 --- /dev/null +++ b/test/CodeGen/R600/srem.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=r600 -mcpu=redwood < %s + +define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %num = load i32 addrspace(1) * %in + %den = load i32 addrspace(1) * %den_ptr + %result = srem i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} + +define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32 addrspace(1) * %in + %result = srem i32 %num, 4 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 + %num = load <2 x i32> addrspace(1) * %in + %den = load <2 x i32> addrspace(1) * %den_ptr + %result = srem <2 x i32> %num, %den + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %num = load <2 x i32> addrspace(1) * %in + %result = srem <2 x i32> %num, + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %num = load <4 x i32> addrspace(1) * %in + %den = load <4 x i32> addrspace(1) * %den_ptr + %result = srem <4 x i32> %num, %den + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %num = load <4 x i32> addrspace(1) * %in + %result = srem <4 x i32> %num, + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +}