From: Daniel Sanders Date: Mon, 23 Sep 2013 14:03:12 +0000 (+0000) Subject: [mips][msa] Added support for matching insert and copy from normal IR (i.e. not intri... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=9a1aaeb012e593fba977015c5d8b6b1aa41a908c;p=oota-llvm.git [mips][msa] Added support for matching insert and copy from normal IR (i.e. not intrinsics) Changes to MIPS SelectionDAG: * Added nodes VEXTRACT_[SZ]EXT_ELT to represent extract and extend in a single operation and implemented the DAG combines necessary to fold sign/zero extends into the extract. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191199 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 5bec80294c8..055a8cf9d2c 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -779,18 +779,18 @@ def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">, - Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_s_h : GCCBuiltin<"__builtin_msa_copy_s_h">, - Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_s_w : GCCBuiltin<"__builtin_msa_copy_s_w">, - Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_b : GCCBuiltin<"__builtin_msa_copy_u_b">, - Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_h : GCCBuiltin<"__builtin_msa_copy_u_h">, - Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_w : GCCBuiltin<"__builtin_msa_copy_u_w">, - Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 443653f5f98..443ce192ff1 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -214,6 +214,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; + case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; + case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; default: return NULL; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index b56ac41955c..71a2ddc2d49 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -167,6 +167,10 @@ namespace llvm { // Combined (XOR (OR $a, $b), -1) VNOR, + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 4f875ea158c..e30cfc5268e 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -23,6 +23,33 @@ def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; +def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + +// Pattern fragments +def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractSExt node:$vec, node:$idx, i8)>; +def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractSExt node:$vec, node:$idx, i16)>; +def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractSExt node:$vec, node:$idx, i32)>; + +def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractZExt node:$vec, node:$idx, i8)>; +def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractZExt node:$vec, node:$idx, i16)>; +def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractZExt node:$vec, node:$idx, i32)>; + +def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; + def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; @@ -805,12 +832,12 @@ class MSA_BIT_D_DESC_BASE { dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCWS:$ws, uimm6:$n); + dag InOperandList = (ins RCWS:$ws, uimm4:$n); string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]"); - list Pattern = [(set RCD:$rd, (OpNode RCWS:$ws, immZExt6:$n))]; + list Pattern = [(set RCD:$rd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]; InstrItinClass Itinerary = itin; } @@ -922,11 +949,11 @@ class MSA_INSERT_DESC_BASE { dag OutOperandList = (outs RCD:$wd); - dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$rs); + dag InOperandList = (ins RCD:$wd_in, RCWS:$rs, uimm6:$n); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs"); list Pattern = [(set RCD:$wd, (OpNode RCD:$wd_in, - immZExt6:$n, - RCWS:$rs))]; + RCWS:$rs, + immZExt6:$n))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } @@ -1222,18 +1249,18 @@ class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, MSA128D>; -class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", int_mips_copy_s_b, +class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8, GPR32, MSA128B>; -class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", int_mips_copy_s_h, +class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", vextract_sext_i16, v8i16, GPR32, MSA128H>; -class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", int_mips_copy_s_w, +class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", vextract_sext_i32, v4i32, GPR32, MSA128W>; -class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", int_mips_copy_u_b, +class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", vextract_zext_i8, v16i8, GPR32, MSA128B>; -class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", int_mips_copy_u_h, +class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16, GPR32, MSA128H>; -class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", int_mips_copy_u_w, +class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32, GPR32, MSA128W>; class CTCMSA_DESC { @@ -1561,12 +1588,12 @@ class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, MSA128H>; class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, MSA128W>; class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, MSA128D>; -class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", int_mips_insert_b, - MSA128B, GPR32>; -class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, - MSA128H, GPR32>; -class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, - MSA128W, GPR32>; +class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8, MSA128B, + GPR32>; +class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", vinsert_v8i16, MSA128H, + GPR32>; +class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", vinsert_v4i32, MSA128W, + GPR32>; class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, MSA128B>; class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, MSA128H>; @@ -2689,6 +2716,9 @@ def XORI_B : XORI_B_ENC, XORI_B_DESC; class MSAPat pred = [HasMSA]> : Pat, Requires; +def : MSAPat<(extractelt (v4i32 MSA128W:$ws), immZExt4:$idx), + (COPY_S_W MSA128W:$ws, immZExt4:$idx)>; + def : MSAPat<(v16i8 (load addr:$addr)), (LD_B addr:$addr)>; def : MSAPat<(v8i16 (load addr:$addr)), (LD_H addr:$addr)>; def : MSAPat<(v4i32 (load addr:$addr)), (LD_W addr:$addr)>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 610b8bf3853..732b68b5a64 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -91,6 +91,8 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::XOR); } @@ -161,6 +163,8 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::BITCAST, Ty, Legal); setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); setOperationAction(ISD::ADD, Ty, Legal); @@ -190,6 +194,7 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); if (Ty != MVT::v8f16) { setOperationAction(ISD::FADD, Ty, Legal); @@ -233,6 +238,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); } @@ -404,6 +410,56 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT +// +// Performs the following transformations: +// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::AND. +// - Removes redundant zero extensions performed by an ISD::AND. +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (!Subtarget->hasMSA()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Op0Opcode = Op0->getOpcode(); + + // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) + // where $d + 1 == 2^n and n == 32 + // or $d + 1 == 2^n and n <= 32 and ZExt + // -> (MipsVExtractZExt $a, $b, $c) + if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || + Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { + ConstantSDNode *Mask = dyn_cast(Op1); + + if (!Mask) + return SDValue(); + + int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); + + if (Log2IfPositive <= 0) + return SDValue(); // Mask+1 is not a power of 2 + + SDValue Op0Op2 = Op0->getOperand(2); + EVT ExtendTy = cast(Op0Op2)->getVT(); + unsigned ExtendTySize = ExtendTy.getSizeInBits(); + unsigned Log2 = Log2IfPositive; + + if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || + Log2 == ExtendTySize) { + SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; + DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, + Op0->getVTList(), Ops, Op0->getNumOperands()); + return Op0; + } + } + + return SDValue(); +} + static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { @@ -500,11 +556,53 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); } +// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold +// constant splats into MipsISD::SHRA_DSP for DSPr2. +// +// Performs the following transformations: +// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::SRA and ISD::SHL nodes. +// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL +// sequence. +// +// See performDSPShiftCombine for more information about the transformation +// used for DSPr2. static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { EVT Ty = N->getValueType(0); + if (Subtarget->hasMSA()) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) + // where $d + sizeof($c) == 32 + // or $d + sizeof($c) <= 32 and SExt + // -> (MipsVExtractSExt $a, $b, $c) + if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { + SDValue Op0Op0 = Op0->getOperand(0); + ConstantSDNode *ShAmount = dyn_cast(Op1); + + if (!ShAmount) + return SDValue(); + + EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); + unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); + + if (TotalBits == 32 || + (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && + TotalBits <= 32)) { + SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), + Op0Op0->getOperand(2) }; + DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, + Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); + return Op0Op0; + } + } + } + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) return SDValue(); @@ -616,6 +714,9 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { case ISD::ADDE: return performADDECombine(N, DAG, DCI, Subtarget); + case ISD::AND: + Val = performANDCombine(N, DAG, DCI, Subtarget); + break; case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -889,6 +990,33 @@ static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } +// Lower an MSA copy intrinsic into the specified SelectionDAG node +static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Vec = Op->getOperand(1); + SDValue Idx = Op->getOperand(2); + EVT ResTy = Op->getValueType(0); + EVT EltTy = Vec->getValueType(0).getVectorElementType(); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, + DAG.getValueType(EltTy)); + + return Result; +} + +// Lower an MSA insert intrinsic into the specified SelectionDAG node +static SDValue lowerMSAInsertIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Op0 = Op->getOperand(1); + SDValue Op1 = Op->getOperand(2); + SDValue Op2 = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Op0, Op2, Op1); + + return Result; +} + static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { SDLoc DL(Op); SDValue Value = Op->getOperand(1); @@ -957,6 +1085,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO); case Intrinsic::mips_bz_v: return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO); + case Intrinsic::mips_copy_s_b: + case Intrinsic::mips_copy_s_h: + case Intrinsic::mips_copy_s_w: + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); + case Intrinsic::mips_copy_u_b: + case Intrinsic::mips_copy_u_h: + case Intrinsic::mips_copy_u_w: + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); case Intrinsic::mips_div_s_b: case Intrinsic::mips_div_s_h: case Intrinsic::mips_div_s_w: @@ -992,6 +1128,10 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsub_w: case Intrinsic::mips_fsub_d: return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); + case Intrinsic::mips_insert_b: + case Intrinsic::mips_insert_h: + case Intrinsic::mips_insert_w: + return lowerMSAInsertIntr(Op, DAG, ISD::INSERT_VECTOR_ELT); case Intrinsic::mips_ldi_b: case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: @@ -1165,6 +1305,24 @@ static bool isSplatVector(const BuildVectorSDNode *N) { return true; } +// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue MipsSETargetLowering:: +lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + SDValue Op1 = Op->getOperand(1); + EVT EltTy = Op0->getValueType(0).getVectorElementType(); + return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); +} + // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the // backend. // diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 909ab7dfde1..644fe02665c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -73,6 +73,7 @@ namespace llvm { SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll index 4cec6aa28aa..8a86d35c70c 100644 --- a/test/CodeGen/Mips/msa/basic_operations.ll +++ b/test/CodeGen/Mips/msa/basic_operations.ll @@ -118,3 +118,229 @@ define void @const_v2i64() nounwind { ret void ; MIPS32: .size const_v2i64 } + +define i32 @extract_sext_v16i8() nounwind { + ; MIPS32: extract_sext_v16i8: + + %1 = load <16 x i8>* @v16i8 + ; MIPS32-DAG: ld.b [[R1:\$w[0-9]+]], + + %2 = add <16 x i8> %1, %1 + ; MIPS32-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <16 x i8> %2, i32 1 + %4 = sext i8 %3 to i32 + ; MIPS32-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1] + ; MIPS32-NOT: sll + ; MIPS32-NOT: sra + + ret i32 %4 + ; MIPS32: .size extract_sext_v16i8 +} + +define i32 @extract_sext_v8i16() nounwind { + ; MIPS32: extract_sext_v8i16: + + %1 = load <8 x i16>* @v8i16 + ; MIPS32-DAG: ld.h [[R1:\$w[0-9]+]], + + %2 = add <8 x i16> %1, %1 + ; MIPS32-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <8 x i16> %2, i32 1 + %4 = sext i16 %3 to i32 + ; MIPS32-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1] + ; MIPS32-NOT: sll + ; MIPS32-NOT: sra + + ret i32 %4 + ; MIPS32: .size extract_sext_v8i16 +} + +define i32 @extract_sext_v4i32() nounwind { + ; MIPS32: extract_sext_v4i32: + + %1 = load <4 x i32>* @v4i32 + ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], + + %2 = add <4 x i32> %1, %1 + ; MIPS32-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <4 x i32> %2, i32 1 + ; MIPS32-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1] + + ret i32 %3 + ; MIPS32: .size extract_sext_v4i32 +} + +define i64 @extract_sext_v2i64() nounwind { + ; MIPS32: extract_sext_v2i64: + + %1 = load <2 x i64>* @v2i64 + ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], + + %2 = add <2 x i64> %1, %1 + ; MIPS32-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <2 x i64> %2, i32 1 + ; MIPS32-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2] + ; MIPS32-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3] + ; MIPS32-NOT: sll + ; MIPS32-NOT: sra + + ret i64 %3 + ; MIPS32: .size extract_sext_v2i64 +} + +define i32 @extract_zext_v16i8() nounwind { + ; MIPS32: extract_zext_v16i8: + + %1 = load <16 x i8>* @v16i8 + ; MIPS32-DAG: ld.b [[R1:\$w[0-9]+]], + + %2 = add <16 x i8> %1, %1 + ; MIPS32-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <16 x i8> %2, i32 1 + %4 = zext i8 %3 to i32 + ; MIPS32-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1] + ; MIPS32-NOT: andi + + ret i32 %4 + ; MIPS32: .size extract_zext_v16i8 +} + +define i32 @extract_zext_v8i16() nounwind { + ; MIPS32: extract_zext_v8i16: + + %1 = load <8 x i16>* @v8i16 + ; MIPS32-DAG: ld.h [[R1:\$w[0-9]+]], + + %2 = add <8 x i16> %1, %1 + ; MIPS32-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <8 x i16> %2, i32 1 + %4 = zext i16 %3 to i32 + ; MIPS32-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1] + ; MIPS32-NOT: andi + + ret i32 %4 + ; MIPS32: .size extract_zext_v8i16 +} + +define i32 @extract_zext_v4i32() nounwind { + ; MIPS32: extract_zext_v4i32: + + %1 = load <4 x i32>* @v4i32 + ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], + + %2 = add <4 x i32> %1, %1 + ; MIPS32-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <4 x i32> %2, i32 1 + ; MIPS32-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1] + + ret i32 %3 + ; MIPS32: .size extract_zext_v4i32 +} + +define i64 @extract_zext_v2i64() nounwind { + ; MIPS32: extract_zext_v2i64: + + %1 = load <2 x i64>* @v2i64 + ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], + + %2 = add <2 x i64> %1, %1 + ; MIPS32-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] + + %3 = extractelement <2 x i64> %2, i32 1 + ; MIPS32-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2] + ; MIPS32-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3] + ; MIPS32-NOT: andi + + ret i64 %3 + ; MIPS32: .size extract_zext_v2i64 +} + +define void @insert_v16i8(i32 %a) nounwind { + ; MIPS32: insert_v16i8: + + %1 = load <16 x i8>* @v16i8 + ; MIPS32-DAG: ld.b [[R1:\$w[0-9]+]], + + %a2 = trunc i32 %a to i8 + %a3 = sext i8 %a2 to i32 + %a4 = trunc i32 %a3 to i8 + ; MIPS32-NOT: andi + ; MIPS32-NOT: sra + + %2 = insertelement <16 x i8> %1, i8 %a4, i32 1 + ; MIPS32-DAG: insert.b [[R1]][1], $4 + + store <16 x i8> %2, <16 x i8>* @v16i8 + ; MIPS32-DAG: st.b [[R1]] + + ret void + ; MIPS32: .size insert_v16i8 +} + +define void @insert_v8i16(i32 %a) nounwind { + ; MIPS32: insert_v8i16: + + %1 = load <8 x i16>* @v8i16 + ; MIPS32-DAG: ld.h [[R1:\$w[0-9]+]], + + %a2 = trunc i32 %a to i16 + %a3 = sext i16 %a2 to i32 + %a4 = trunc i32 %a3 to i16 + ; MIPS32-NOT: andi + ; MIPS32-NOT: sra + + %2 = insertelement <8 x i16> %1, i16 %a4, i32 1 + ; MIPS32-DAG: insert.h [[R1]][1], $4 + + store <8 x i16> %2, <8 x i16>* @v8i16 + ; MIPS32-DAG: st.h [[R1]] + + ret void + ; MIPS32: .size insert_v8i16 +} + +define void @insert_v4i32(i32 %a) nounwind { + ; MIPS32: insert_v4i32: + + %1 = load <4 x i32>* @v4i32 + ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], + + ; MIPS32-NOT: andi + ; MIPS32-NOT: sra + + %2 = insertelement <4 x i32> %1, i32 %a, i32 1 + ; MIPS32-DAG: insert.w [[R1]][1], $4 + + store <4 x i32> %2, <4 x i32>* @v4i32 + ; MIPS32-DAG: st.w [[R1]] + + ret void + ; MIPS32: .size insert_v4i32 +} + +define void @insert_v2i64(i64 %a) nounwind { + ; MIPS32: insert_v2i64: + + %1 = load <2 x i64>* @v2i64 + ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], + + ; MIPS32-NOT: andi + ; MIPS32-NOT: sra + + %2 = insertelement <2 x i64> %1, i64 %a, i32 1 + ; MIPS32-DAG: insert.w [[R1]][2], $4 + ; MIPS32-DAG: insert.w [[R1]][3], $5 + + store <2 x i64> %2, <2 x i64>* @v2i64 + ; MIPS32-DAG: st.w [[R1]] + + ret void + ; MIPS32: .size insert_v2i64 +}