From cfb1e1703130809043a7b020b4cdfa04b59fa8ec Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 10:28:18 +0000 Subject: [PATCH] [mips][msa] Added support for matching slli, srai, and srli from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191285 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 92 +++++++++++--- lib/Target/Mips/MipsSEISelLowering.cpp | 18 +++ test/CodeGen/Mips/msa/bitwise.ll | 168 +++++++++++++++++++++++++ 3 files changed, 258 insertions(+), 20 deletions(-) diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index b1e2e579f3e..92dc046ae10 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -831,6 +831,50 @@ class MSA_BIT_D_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm3:$u3); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati8 immZExt3:$u3)))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_SPLATH_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm4:$u4); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati16 immZExt4:$u4)))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_SPLATW_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm5:$u5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati32 immZExt5:$u5)))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_SPLATD_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm6:$u6); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati64 immZExt6:$u6)))]; + InstrItinClass Itinerary = itin; +} + class MSA_COPY_DESC_BASE { @@ -1713,15 +1757,23 @@ class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; -class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, MSA128B>; -class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, MSA128H>; -class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, MSA128W>; -class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, MSA128D>; +class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, + MSA128B>; +class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, + MSA128H>; +class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, + MSA128W>; +class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, + MSA128D>; -class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, MSA128B>; -class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, MSA128H>; -class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, MSA128W>; -class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, MSA128D>; +class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, + MSA128B>; +class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, + MSA128H>; +class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, + MSA128W>; +class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, + MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; @@ -1837,10 +1889,10 @@ class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128H>; class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128W>; class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128D>; -class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, MSA128B>; -class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, MSA128H>; -class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, MSA128W>; -class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, MSA128D>; +class SLLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"slli.b", shl, MSA128B>; +class SLLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"slli.h", shl, MSA128H>; +class SLLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"slli.w", shl, MSA128W>; +class SLLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"slli.d", shl, MSA128D>; class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, MSA128B, GPR32>; @@ -1865,10 +1917,10 @@ class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128H>; class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128W>; class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128D>; -class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, MSA128B>; -class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, MSA128H>; -class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, MSA128W>; -class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, MSA128D>; +class SRAI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srai.b", sra, MSA128B>; +class SRAI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srai.h", sra, MSA128H>; +class SRAI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srai.w", sra, MSA128W>; +class SRAI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srai.d", sra, MSA128D>; class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; @@ -1885,10 +1937,10 @@ class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128H>; class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128W>; class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128D>; -class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, MSA128B>; -class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, MSA128H>; -class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, MSA128W>; -class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, MSA128D>; +class SRLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srli.b", srl, MSA128B>; +class SRLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srli.h", srl, MSA128H>; +class SRLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srli.w", srl, MSA128W>; +class SRLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srli.d", srl, MSA128D>; class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 60960d67e5c..9bd3be2d431 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1187,16 +1187,34 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_sll_w: case Intrinsic::mips_sll_d: return lowerMSABinaryIntr(Op, DAG, ISD::SHL); + case Intrinsic::mips_slli_b: + case Intrinsic::mips_slli_h: + case Intrinsic::mips_slli_w: + case Intrinsic::mips_slli_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SHL, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_sra_b: case Intrinsic::mips_sra_h: case Intrinsic::mips_sra_w: case Intrinsic::mips_sra_d: return lowerMSABinaryIntr(Op, DAG, ISD::SRA); + case Intrinsic::mips_srai_b: + case Intrinsic::mips_srai_h: + case Intrinsic::mips_srai_w: + case Intrinsic::mips_srai_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SRA, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_srl_b: case Intrinsic::mips_srl_h: case Intrinsic::mips_srl_w: case Intrinsic::mips_srl_d: return lowerMSABinaryIntr(Op, DAG, ISD::SRL); + case Intrinsic::mips_srli_b: + case Intrinsic::mips_srli_h: + case Intrinsic::mips_srli_w: + case Intrinsic::mips_srli_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SRL, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_subv_b: case Intrinsic::mips_subv_h: case Intrinsic::mips_subv_w: diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll index a388dc8b923..fefaca74e43 100644 --- a/test/CodeGen/Mips/msa/bitwise.ll +++ b/test/CodeGen/Mips/msa/bitwise.ll @@ -324,6 +324,62 @@ define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { ; CHECK: .size sll_v2i64 } +define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: sll_v16i8_i: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = shl <16 x i8> %1, + ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size sll_v16i8_i +} + +define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: sll_v8i16_i: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = shl <8 x i16> %1, + ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size sll_v8i16_i +} + +define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: sll_v4i32_i: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = shl <4 x i32> %1, + ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size sll_v4i32_i +} + +define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: sll_v2i64_i: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = shl <2 x i64> %1, + ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size sll_v2i64_i +} + define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { ; CHECK: sra_v16i8: @@ -388,6 +444,62 @@ define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { ; CHECK: .size sra_v2i64 } +define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: sra_v16i8_i: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = ashr <16 x i8> %1, + ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size sra_v16i8_i +} + +define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: sra_v8i16_i: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = ashr <8 x i16> %1, + ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size sra_v8i16_i +} + +define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: sra_v4i32_i: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = ashr <4 x i32> %1, + ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size sra_v4i32_i +} + +define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: sra_v2i64_i: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = ashr <2 x i64> %1, + ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size sra_v2i64_i +} + define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { ; CHECK: srl_v16i8: @@ -452,6 +564,62 @@ define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { ; CHECK: .size srl_v2i64 } +define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: srl_v16i8_i: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = lshr <16 x i8> %1, + ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size srl_v16i8_i +} + +define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: srl_v8i16_i: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = lshr <8 x i16> %1, + ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size srl_v8i16_i +} + +define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: srl_v4i32_i: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = lshr <4 x i32> %1, + ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size srl_v4i32_i +} + +define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: srl_v2i64_i: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = lshr <2 x i64> %1, + ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size srl_v2i64_i +} + define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { ; CHECK: ctpop_v16i8: -- 2.34.1