From 2d7d45bb2078150697d626ce3c242cf519b435f4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 31 Oct 2015 17:27:52 +0000 Subject: [PATCH] [X86] Reduce math for index calculation for inserting and extracting subvectors and elements by exploiting the fact that all supported vector types have a power 2 number of elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251740 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e8b1b8bb5d3..3910c5b7e65 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4373,19 +4373,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); + assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth) - * ElemsPerChunk); + // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. + IdxVal &= ~(ElemsPerChunk - 1); // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin() + NormalizedIdxVal, - ElemsPerChunk)); + makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl); + SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); } @@ -4423,13 +4422,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec, // Insert the relevant vectorWidth bits. unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); + assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth) - * ElemsPerChunk); + // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. + IdxVal &= ~(ElemsPerChunk - 1); - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl); + SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } @@ -11390,10 +11389,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT EltVT = VecVT.getVectorElementType(); unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); + assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); - //if (IdxVal >= NumElems/2) - // IdxVal -= NumElems/2; - IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk; + // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2 + // this can be done with a mask. + IdxVal &= ElemsPerChunk - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getConstant(IdxVal, dl, MVT::i32)); } @@ -11529,7 +11529,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // Insert the element into the desired chunk. unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits(); - unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128; + assert(isPowerOf2_32(NumEltsIn128)); + // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo. + unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, DAG.getConstant(IdxIn128, dl, MVT::i32)); -- 2.34.1