From 52c50f5ec5f5fd35f88c955edeacd5e81e8e025f Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Fri, 11 Jul 2014 12:08:23 +0000 Subject: [PATCH] [X86] Fix the inversion of low and high bits for the lowering of MUL_LOHI. Also add a few comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212808 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 50 ++++++++++++++++++++++++------ test/CodeGen/X86/vector-idiv.ll | 3 -- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5ccff20e294..a5c560ab835 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15156,10 +15156,23 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || (VT == MVT::v8i32 && Subtarget->hasInt256())); - // Get the high parts. + // PMULxD operations multiply each even value (starting at 0) of LHS with + // the related value of RHS and produce a widen result. + // E.g., PMULUDQ <4 x i32> , <4 x i32> + // => <2 x i64> + // + // In other word, to have all the results, we need to perform two PMULxD: + // 1. one with the even values. + // 2. one with the odd values. + // To achieve #2, with need to place the odd values at an even position. + // + // Place the odd value at an even position (basically, shift all values 1 + // step to the left): const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1}; - SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); - SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); + // => + SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); + // => + SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); // Emit two multiplies, one for the lower 2 ints and one for the higher 2 // ints. @@ -15167,22 +15180,39 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; unsigned Opcode = (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; + // PMULUDQ <4 x i32> , <4 x i32> + // => <2 x i64> SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); + // PMULUDQ <4 x i32> , <4 x i32> + // => <2 x i64> SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1)); + DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); // Shuffle it back into the right order. + // The internal representation is big endian. + // In other words, a i64 bitcasted to 2 x i32 has its high part at index 0 + // and its low part at index 1. + // Moreover, we have: Mul1 = ; Mul2 = + // Vector index 0 1 ; 2 3 + // We want + // Vector index 0 2 1 3 + // Since each element is seen as 2 x i32, we get: + // high_mask[i] = 2 x vector_index[i] + // low_mask[i] = 2 x vector_index[i] + 1 + // where vector_index = {0, Size/2, 1, Size/2 + 1, ..., + // Size/2 - 1, Size/2 + Size/2 - 1} + // where Size is the number of element of the final vector. SDValue Highs, Lows; if (VT == MVT::v8i32) { - const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; + const int HighMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; + const int LowMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); } else { - const int HighMask[] = {1, 5, 3, 7}; + const int HighMask[] = {0, 4, 2, 6}; Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 4, 2, 6}; + const int LowMask[] = {1, 5, 3, 7}; Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); } @@ -15200,7 +15230,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup); } - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows); + // The low part of a MUL_LOHI is supposed to be the first value and the + // high part the second value. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Lows, Highs); } static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll index b6d43e985f0..a3229073751 100644 --- a/test/CodeGen/X86/vector-idiv.ll +++ b/test/CodeGen/X86/vector-idiv.ll @@ -132,9 +132,6 @@ define <4 x i32> @test8(<4 x i32> %a) { ; SSE41: padd ; SSE-LABEL: test8: -; SSE: psrad $31 -; SSE: pand -; SSE: paddd ; SSE: pmuludq ; SSE: pshufd $49 ; SSE-NOT: pshufd $49 -- 2.34.1