TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// Don't create instructions with illegal types after legalize types has run.
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
+ // During Type Legalization, when promoting illegal vector types,
+ // the backend might introduce new shuffle dag nodes and bitcasts.
+ //
+ // This code performs the following transformation:
+ // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
+ // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
+ //
+ // We do this only if both the bitcast and the BINOP dag nodes have
+ // one use. Also, perform this transformation only if the new binary
+ // operation is legal. This is to avoid introducing dag nodes that
+ // potentially need to be further expanded (or custom lowered) into a
+ // less optimal sequence of dag nodes.
+ if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
+ N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::BITCAST) {
+ SDValue BC0 = N0.getOperand(0);
+ EVT SVT = BC0.getValueType();
+ unsigned Opcode = BC0.getOpcode();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if (BC0.hasOneUse() && SVT.isVector() &&
+ SVT.getVectorNumElements() * 2 == NumElts &&
+ TLI.isOperationLegal(Opcode, VT)) {
+ bool CanFold = false;
+ switch (Opcode) {
+ default : break;
+ case ISD::ADD :
+ case ISD::FADD :
+ case ISD::SUB :
+ case ISD::FSUB :
+ case ISD::MUL :
+ case ISD::FMUL :
+ CanFold = true;
+ }
+
+ unsigned SVTNumElts = SVT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
+ CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
+ for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
+ CanFold = SVOp->getMaskElt(i) < 0;
+
+ if (CanFold) {
+ SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
+ SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
+ return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
+ }
+ }
+ }
+
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();
--- /dev/null
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+define double @test1_add(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %add = add <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %add to double
+ ret double %3
+}
+; CHECK-LABEL: test1_add
+; SSE41: paddd
+; AVX: vpaddd
+; CHECK-NEXT: ret
+
+
+define double @test2_add(double %A, double %B) {
+ %1 = bitcast double %A to <4 x i16>
+ %2 = bitcast double %B to <4 x i16>
+ %add = add <4 x i16> %1, %2
+ %3 = bitcast <4 x i16> %add to double
+ ret double %3
+}
+; CHECK-LABEL: test2_add
+; SSE41: paddw
+; AVX: vpaddw
+; CHECK-NEXT: ret
+
+define double @test3_add(double %A, double %B) {
+ %1 = bitcast double %A to <8 x i8>
+ %2 = bitcast double %B to <8 x i8>
+ %add = add <8 x i8> %1, %2
+ %3 = bitcast <8 x i8> %add to double
+ ret double %3
+}
+; CHECK-LABEL: test3_add
+; SSE41: paddb
+; AVX: vpaddb
+; CHECK-NEXT: ret
+
+
+define double @test1_sub(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %sub = sub <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %sub to double
+ ret double %3
+}
+; CHECK-LABEL: test1_sub
+; SSE41: psubd
+; AVX: vpsubd
+; CHECK-NEXT: ret
+
+
+define double @test2_sub(double %A, double %B) {
+ %1 = bitcast double %A to <4 x i16>
+ %2 = bitcast double %B to <4 x i16>
+ %sub = sub <4 x i16> %1, %2
+ %3 = bitcast <4 x i16> %sub to double
+ ret double %3
+}
+; CHECK-LABEL: test2_sub
+; SSE41: psubw
+; AVX: vpsubw
+; CHECK-NEXT: ret
+
+
+define double @test3_sub(double %A, double %B) {
+ %1 = bitcast double %A to <8 x i8>
+ %2 = bitcast double %B to <8 x i8>
+ %sub = sub <8 x i8> %1, %2
+ %3 = bitcast <8 x i8> %sub to double
+ ret double %3
+}
+; CHECK-LABEL: test3_sub
+; SSE41: psubb
+; AVX: vpsubb
+; CHECK-NEXT: ret
+
+
+define double @test1_mul(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %mul = mul <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %mul to double
+ ret double %3
+}
+; CHECK-LABEL: test1_mul
+; SSE41: pmulld
+; AVX: vpmulld
+; CHECK-NEXT: ret
+
+
+define double @test2_mul(double %A, double %B) {
+ %1 = bitcast double %A to <4 x i16>
+ %2 = bitcast double %B to <4 x i16>
+ %mul = mul <4 x i16> %1, %2
+ %3 = bitcast <4 x i16> %mul to double
+ ret double %3
+}
+; CHECK-LABEL: test2_mul
+; SSE41: pmullw
+; AVX: vpmullw
+; CHECK-NEXT: ret
+
+; There is no legal ISD::MUL with type MVT::v8i16.
+define double @test3_mul(double %A, double %B) {
+ %1 = bitcast double %A to <8 x i8>
+ %2 = bitcast double %B to <8 x i8>
+ %mul = mul <8 x i8> %1, %2
+ %3 = bitcast <8 x i8> %mul to double
+ ret double %3
+}
+; CHECK-LABEL: test3_mul
+; CHECK: pmullw
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
+
+define double @test1_and(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %and = and <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %and to double
+ ret double %3
+}
+; CHECK-LABEL: test1_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test2_and(double %A, double %B) {
+ %1 = bitcast double %A to <4 x i16>
+ %2 = bitcast double %B to <4 x i16>
+ %and = and <4 x i16> %1, %2
+ %3 = bitcast <4 x i16> %and to double
+ ret double %3
+}
+; CHECK-LABEL: test2_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test3_and(double %A, double %B) {
+ %1 = bitcast double %A to <8 x i8>
+ %2 = bitcast double %B to <8 x i8>
+ %and = and <8 x i8> %1, %2
+ %3 = bitcast <8 x i8> %and to double
+ ret double %3
+}
+; CHECK-LABEL: test3_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test1_or(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %or = or <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %or to double
+ ret double %3
+}
+; CHECK-LABEL: test1_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test2_or(double %A, double %B) {
+ %1 = bitcast double %A to <4 x i16>
+ %2 = bitcast double %B to <4 x i16>
+ %or = or <4 x i16> %1, %2
+ %3 = bitcast <4 x i16> %or to double
+ ret double %3
+}
+; CHECK-LABEL: test2_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test3_or(double %A, double %B) {
+ %1 = bitcast double %A to <8 x i8>
+ %2 = bitcast double %B to <8 x i8>
+ %or = or <8 x i8> %1, %2
+ %3 = bitcast <8 x i8> %or to double
+ ret double %3
+}
+; CHECK-LABEL: test3_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test1_xor(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %xor = xor <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %xor to double
+ ret double %3
+}
+; CHECK-LABEL: test1_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test2_xor(double %A, double %B) {
+ %1 = bitcast double %A to <4 x i16>
+ %2 = bitcast double %B to <4 x i16>
+ %xor = xor <4 x i16> %1, %2
+ %3 = bitcast <4 x i16> %xor to double
+ ret double %3
+}
+; CHECK-LABEL: test2_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test3_xor(double %A, double %B) {
+ %1 = bitcast double %A to <8 x i8>
+ %2 = bitcast double %B to <8 x i8>
+ %xor = xor <8 x i8> %1, %2
+ %3 = bitcast <8 x i8> %xor to double
+ ret double %3
+}
+; CHECK-LABEL: test3_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test_fadd(double %A, double %B) {
+ %1 = bitcast double %A to <2 x float>
+ %2 = bitcast double %B to <2 x float>
+ %add = fadd <2 x float> %1, %2
+ %3 = bitcast <2 x float> %add to double
+ ret double %3
+}
+; CHECK-LABEL: test_fadd
+; SSE41: addps
+; AVX: vaddps
+; CHECK-NEXT: ret
+
+define double @test_fsub(double %A, double %B) {
+ %1 = bitcast double %A to <2 x float>
+ %2 = bitcast double %B to <2 x float>
+ %sub = fsub <2 x float> %1, %2
+ %3 = bitcast <2 x float> %sub to double
+ ret double %3
+}
+; CHECK-LABEL: test_fsub
+; SSE41: subps
+; AVX: vsubps
+; CHECK-NEXT: ret
+
+define double @test_fmul(double %A, double %B) {
+ %1 = bitcast double %A to <2 x float>
+ %2 = bitcast double %B to <2 x float>
+ %mul = fmul <2 x float> %1, %2
+ %3 = bitcast <2 x float> %mul to double
+ ret double %3
+}
+; CHECK-LABEL: test_fmul
+; SSE41: mulps
+; AVX: vmulps
+; CHECK-NEXT: ret
+