(VT == MVT::v8i32 && Subtarget->hasInt256()));
// Get the high parts.
- const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
// Shuffle it back into the right order.
- const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15};
- SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14};
- SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ SDValue Highs, Lows;
+ if (VT == MVT::v8i32) {
+ const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ } else {
+ const int HighMask[] = {1, 5, 3, 7};
+ Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 4, 2, 6};
+ Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ }
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
; SSE41-LABEL: test1:
; SSE41: pmuludq
-; SSE41: pshufd $57
+; SSE41: pshufd $49
; SSE41: pmuludq
; SSE41: shufps $-35
; SSE41: psubd
; AVX-LABEL: test1:
; AVX: vpmuludq
-; AVX: vpshufd $57
+; AVX: vpshufd $49
; AVX: vpmuludq
; AVX: vshufps $-35
; AVX: vpsubd
ret <8 x i32> %div
; AVX-LABEL: test2:
-; AVX: vpermd
+; AVX: vpbroadcastd
+; AVX: vpalignr $4
; AVX: vpmuludq
-; AVX: vshufps $-35
; AVX: vpmuludq
-; AVX: vshufps $-35
+; AVX: vpblendd $170
; AVX: vpsubd
; AVX: vpsrld $1
; AVX: vpadd
define <16 x i8> @test7(<16 x i8> %a) {
%div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %div
+
+; FIXME: scalarized
+; SSE41-LABEL: test7:
+; SSE41: pext
+; AVX-LABEL: test7:
+; AVX: pext
}
define <4 x i32> @test8(<4 x i32> %a) {
; SSE41-LABEL: test8:
; SSE41: pmuldq
-; SSE41: pshufd $57
-; SSE41-NOT: pshufd $57
+; SSE41: pshufd $49
+; SSE41-NOT: pshufd $49
; SSE41: pmuldq
; SSE41: shufps $-35
; SSE41: pshufd $-40
; SSE: pand
; SSE: paddd
; SSE: pmuludq
-; SSE: pshufd $57
-; SSE-NOT: pshufd $57
+; SSE: pshufd $49
+; SSE-NOT: pshufd $49
; SSE: pmuludq
; SSE: shufps $-35
; SSE: pshufd $-40
; AVX-LABEL: test8:
; AVX: vpmuldq
-; AVX: vpshufd $57
-; AVX-NOT: vpshufd $57
+; AVX: vpshufd $49
+; AVX-NOT: vpshufd $49
; AVX: vpmuldq
; AVX: vshufps $-35
; AVX: vpshufd $-40
ret <8 x i32> %div
; AVX-LABEL: test9:
+; AVX: vpalignr $4
; AVX: vpbroadcastd
; AVX: vpmuldq
-; AVX: vshufps $-35
; AVX: vpmuldq
-; AVX: vshufps $-35
-; AVX: vpshufd $-40
+; AVX: vpblendd $170
; AVX: vpadd
; AVX: vpsrld $31
; AVX: vpsrad $2
; AVX-LABEL: test10:
; AVX: vpbroadcastd
+; AVX: vpalignr $4
; AVX: vpmuludq
-; AVX: vshufps $-35
; AVX: vpmuludq
-; AVX: vshufps $-35
+; AVX: vpblendd $170
; AVX: vpsubd
; AVX: vpsrld $1
; AVX: vpadd
ret <8 x i32> %rem
; AVX-LABEL: test11:
+; AVX: vpalignr $4
; AVX: vpbroadcastd
; AVX: vpmuldq
-; AVX: vshufps $-35
; AVX: vpmuldq
-; AVX: vshufps $-35
-; AVX: vpshufd $-40
+; AVX: vpblendd $170
; AVX: vpadd
; AVX: vpsrld $31
; AVX: vpsrad $2