From aee7825762830536956b9e634fd7ffd59396984d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 12 Nov 2013 10:31:49 +0000 Subject: [PATCH] [mips][msa] Added support for matching bset, bseti, bneg, and bnegi from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194469 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 217 +++++++++++++++------- lib/Target/Mips/MipsSEISelLowering.cpp | 69 +++++++ test/CodeGen/Mips/msa/bitwise.ll | 248 +++++++++++++++++++++++++ 3 files changed, 464 insertions(+), 70 deletions(-) diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index bfdba9ae839..028070097fe 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -327,6 +327,47 @@ def vsplat_maskr_bits : SplatComplexPattern; +// Any build_vector that is a constant splat with a value that equals 1 +// FIXME: This should be a ComplexPattern but we can't use them because the +// ISel generator requires the uses to have a name, but providing a name +// causes other errors ("used in pattern but not operand list") +def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat (N, Imm) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat (BV, Imm) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vbneg_b : PatFrag<(ops node:$ws, node:$wt), + (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; +def vbneg_h : PatFrag<(ops node:$ws, node:$wt), + (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; +def vbneg_w : PatFrag<(ops node:$ws, node:$wt), + (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; +def vbneg_d : PatFrag<(ops node:$ws, node:$wt), + (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + node:$wt))>; + +def vbset_b : PatFrag<(ops node:$ws, node:$wt), + (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; +def vbset_h : PatFrag<(ops node:$ws, node:$wt), + (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; +def vbset_w : PatFrag<(ops node:$ws, node:$wt), + (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; +def vbset_d : PatFrag<(ops node:$ws, node:$wt), + (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + node:$wt))>; + def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt), (fsub node:$wd, (fmul node:$ws, node:$wt))>; @@ -1055,9 +1096,9 @@ class MSA_BIT_B_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm3:$m); + dag InOperandList = (ins ROWS:$ws, vsplat_uimm3:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))]; InstrItinClass Itinerary = itin; } @@ -1065,9 +1106,9 @@ class MSA_BIT_H_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm4:$m); + dag InOperandList = (ins ROWS:$ws, vsplat_uimm4:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))]; InstrItinClass Itinerary = itin; } @@ -1075,9 +1116,9 @@ class MSA_BIT_W_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm5:$m); + dag InOperandList = (ins ROWS:$ws, vsplat_uimm5:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))]; InstrItinClass Itinerary = itin; } @@ -1085,6 +1126,50 @@ class MSA_BIT_D_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWS:$ws, vsplat_uimm6:$m); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))]; + InstrItinClass Itinerary = itin; +} + +// This class is deprecated and will be removed soon. +class MSA_BIT_B_X_DESC_BASE { + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWS:$ws, uimm3:$m); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))]; + InstrItinClass Itinerary = itin; +} + +// This class is deprecated and will be removed soon. +class MSA_BIT_H_X_DESC_BASE { + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWS:$ws, uimm4:$m); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))]; + InstrItinClass Itinerary = itin; +} + +// This class is deprecated and will be removed soon. +class MSA_BIT_W_X_DESC_BASE { + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWS:$ws, uimm5:$m); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))]; + InstrItinClass Itinerary = itin; +} + +// This class is deprecated and will be removed soon. +class MSA_BIT_D_X_DESC_BASE { + dag OutOperandList = (outs ROWD:$wd); dag InOperandList = (ins ROWS:$ws, uimm6:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))]; @@ -1522,14 +1607,14 @@ class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128HOpnd>; class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128WOpnd>; class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128DOpnd>; -class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, - MSA128BOpnd>; -class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, - MSA128HOpnd>; -class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, - MSA128WOpnd>; -class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, - MSA128DOpnd>; +class BCLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"bclri.b", int_mips_bclri_b, + MSA128BOpnd>; +class BCLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"bclri.h", int_mips_bclri_h, + MSA128HOpnd>; +class BCLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"bclri.w", int_mips_bclri_w, + MSA128WOpnd>; +class BCLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"bclri.d", int_mips_bclri_d, + MSA128DOpnd>; class BINSL_B_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.b", int_mips_binsl_b, MSA128BOpnd>; @@ -1607,19 +1692,15 @@ class BMZI_B_DESC { string Constraints = "$wd = $wd_in"; } -class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128BOpnd>; -class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128HOpnd>; -class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, MSA128WOpnd>; -class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, MSA128DOpnd>; +class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", vbneg_b, MSA128BOpnd>; +class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", vbneg_h, MSA128HOpnd>; +class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", vbneg_w, MSA128WOpnd>; +class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", vbneg_d, MSA128DOpnd>; -class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, - MSA128BOpnd>; -class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, - MSA128HOpnd>; -class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, - MSA128WOpnd>; -class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, - MSA128DOpnd>; +class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, MSA128BOpnd>; +class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, MSA128HOpnd>; +class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, MSA128WOpnd>; +class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, MSA128DOpnd>; class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128BOpnd>; class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128HOpnd>; @@ -1652,19 +1733,15 @@ class BSELI_B_DESC { string Constraints = "$wd = $wd_in"; } -class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128BOpnd>; -class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128HOpnd>; -class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, MSA128WOpnd>; -class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, MSA128DOpnd>; +class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", vbset_b, MSA128BOpnd>; +class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", vbset_h, MSA128HOpnd>; +class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", vbset_w, MSA128WOpnd>; +class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", vbset_d, MSA128DOpnd>; -class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, - MSA128BOpnd>; -class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, - MSA128HOpnd>; -class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, - MSA128WOpnd>; -class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, - MSA128DOpnd>; +class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", or, MSA128BOpnd>; +class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", or, MSA128HOpnd>; +class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", or, MSA128WOpnd>; +class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", or, MSA128DOpnd>; class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128BOpnd>; class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128HOpnd>; @@ -2356,23 +2433,23 @@ class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>; class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; -class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - MSA128BOpnd>; -class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - MSA128HOpnd>; -class SAT_S_W_DESC : MSA_BIT_W_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - MSA128WOpnd>; -class SAT_S_D_DESC : MSA_BIT_D_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - MSA128DOpnd>; +class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, + MSA128BOpnd>; +class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, + MSA128HOpnd>; +class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, + MSA128WOpnd>; +class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, + MSA128DOpnd>; -class SAT_U_B_DESC : MSA_BIT_B_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - MSA128BOpnd>; -class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - MSA128HOpnd>; -class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - MSA128WOpnd>; -class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - MSA128DOpnd>; +class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, + MSA128BOpnd>; +class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, + MSA128HOpnd>; +class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, + MSA128WOpnd>; +class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, + MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2439,14 +2516,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>; class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; -class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, - MSA128BOpnd>; -class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, - MSA128HOpnd>; -class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, - MSA128WOpnd>; -class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, - MSA128DOpnd>; +class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b, + MSA128BOpnd>; +class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h, + MSA128HOpnd>; +class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w, + MSA128WOpnd>; +class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d, + MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2467,14 +2544,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>; class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; -class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, - MSA128BOpnd>; -class SRLRI_H_DESC : MSA_BIT_H_DESC_BASE<"srlri.h", int_mips_srlri_h, - MSA128HOpnd>; -class SRLRI_W_DESC : MSA_BIT_W_DESC_BASE<"srlri.w", int_mips_srlri_w, - MSA128WOpnd>; -class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, - MSA128DOpnd>; +class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b, + MSA128BOpnd>; +class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h, + MSA128HOpnd>; +class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w, + MSA128WOpnd>; +class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d, + MSA128DOpnd>; class ST_DESC_BASEgetOperand(ImmOp), DAG); } +static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, + unsigned Opc, SDValue Imm, + bool BigEndian) { + EVT VecTy = Op->getValueType(0); + SDValue Exp2Imm; + SDLoc DL(Op); + + // The DAG Combiner can't constant fold bitcasted vectors so we must do it + // here. + if (VecTy == MVT::v2i64) { + if (ConstantSDNode *CImm = dyn_cast(Imm)) { + APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); + + SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32); + SDValue BitImmOp = DAG.getConstant(BitImm.trunc(32), MVT::i32); + Exp2Imm = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, + BitImmHiOp, BitImmOp, + BitImmHiOp, BitImmOp)); + } + } + + if (Exp2Imm.getNode() == NULL) { + // We couldnt constant fold, do a vector shift instead + SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32), + DAG); + Exp2Imm = lowerMSASplatImm(DL, VecTy, Imm, DAG); + Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, One, Exp2Imm); + } + + return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); +} + SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1383,6 +1416,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_bneg_b: + case Intrinsic::mips_bneg_h: + case Intrinsic::mips_bneg_w: + case Intrinsic::mips_bneg_d: { + EVT VecTy = Op->getValueType(0); + SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32), + DAG); + + return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, + Op->getOperand(2))); + } + case Intrinsic::mips_bnegi_b: + case Intrinsic::mips_bnegi_h: + case Intrinsic::mips_bnegi_w: + case Intrinsic::mips_bnegi_d: + return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), + !Subtarget->isLittle()); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -1400,6 +1451,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), lowerMSASplatImm(Op, 3, DAG)); + case Intrinsic::mips_bset_b: + case Intrinsic::mips_bset_h: + case Intrinsic::mips_bset_w: + case Intrinsic::mips_bset_d: { + EVT VecTy = Op->getValueType(0); + SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32), + DAG); + + return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, + Op->getOperand(2))); + } + case Intrinsic::mips_bseti_b: + case Intrinsic::mips_bseti_h: + case Intrinsic::mips_bseti_w: + case Intrinsic::mips_bseti_d: + return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), + !Subtarget->isLittle()); case Intrinsic::mips_bz_b: case Intrinsic::mips_bz_h: case Intrinsic::mips_bz_w: diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll index 48e3db1df91..2104921ebe1 100644 --- a/test/CodeGen/Mips/msa/bitwise.ll +++ b/test/CodeGen/Mips/msa/bitwise.ll @@ -1243,6 +1243,254 @@ define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind ; CHECK: .size binsr_v2i64_i } +define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: bset_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shl <16 x i8> , %2 + %4 = or <16 x i8> %1, %3 + ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size bset_v16i8 +} + +define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: bset_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shl <8 x i16> , %2 + %4 = or <8 x i16> %1, %3 + ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size bset_v8i16 +} + +define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: bset_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shl <4 x i32> , %2 + %4 = or <4 x i32> %1, %3 + ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size bset_v4i32 +} + +define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: bset_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shl <2 x i64> , %2 + %4 = or <2 x i64> %1, %3 + ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size bset_v2i64 +} + +define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: bneg_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shl <16 x i8> , %2 + %4 = xor <16 x i8> %1, %3 + ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size bneg_v16i8 +} + +define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: bneg_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shl <8 x i16> , %2 + %4 = xor <8 x i16> %1, %3 + ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size bneg_v8i16 +} + +define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: bneg_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shl <4 x i32> , %2 + %4 = xor <4 x i32> %1, %3 + ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size bneg_v4i32 +} + +define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: bneg_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shl <2 x i64> , %2 + %4 = xor <2 x i64> %1, %3 + ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size bneg_v2i64 +} + +define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: bseti_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = or <16 x i8> %1, + ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3 + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size bseti_v16i8 +} + +define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: bseti_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = or <8 x i16> %1, + ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3 + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size bseti_v8i16 +} + +define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: bseti_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = or <4 x i32> %1, + ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3 + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size bseti_v4i32 +} + +define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: bseti_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = or <2 x i64> %1, + ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3 + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size bseti_v2i64 +} + +define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: bnegi_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = xor <16 x i8> %1, + ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3 + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size bnegi_v16i8 +} + +define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: bnegi_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = xor <8 x i16> %1, + ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3 + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size bnegi_v8i16 +} + +define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: bnegi_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = xor <4 x i32> %1, + ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3 + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size bnegi_v4i32 +} + +define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: bnegi_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = xor <2 x i64> %1, + ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3 + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size bnegi_v2i64 +} + declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val) declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) -- 2.34.1