From be45e0e6696d73f45c2dbd9c2a4ba8b422de08ad Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Thu, 12 Mar 2015 19:34:12 +0000 Subject: [PATCH] [X86] Fix a regression introduced by r223641. The permps and permd instructions have their operands swapped compared to the intrinsic definition. Therefore, they do not fall into the INTR_TYPE_2OP category. I did not create a new category for those two, as they are the only one AFAICT in that case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232085 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++++ lib/Target/X86/X86IntrinsicsInfo.h | 2 -- test/CodeGen/X86/avx2-intrinsics-x86.ll | 10 ++++++++-- test/CodeGen/X86/stack-folding-int-avx2.ll | 4 ++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 366e4b9e073..0533afc59f6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14711,6 +14711,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/instruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); + case Intrinsic::x86_avx512_mask_valign_q_512: case Intrinsic::x86_avx512_mask_valign_d_512: // Vector source operands are swapped. diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index e4368116a67..42256b2c5eb 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0), - X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index b7e2b7412ff..7b2d3585df9 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -746,7 +746,10 @@ declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { - ; CHECK: vpermd + ; Check that the arguments are swapped between the intrinsic definition + ; and its lowering. Indeed, the offsets are the first source in + ; the instruction. + ; CHECK: vpermd %ymm0, %ymm1, %ymm0 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res } @@ -754,7 +757,10 @@ declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) { - ; CHECK: vpermps + ; Check that the arguments are swapped between the intrinsic definition + ; and its lowering. Indeed, the offsets are the first source in + ; the instruction. + ; CHECK: vpermps %ymm0, %ymm1, %ymm0 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] ret <8 x float> %res } diff --git a/test/CodeGen/X86/stack-folding-int-avx2.ll b/test/CodeGen/X86/stack-folding-int-avx2.ll index fa81bf104b4..7f384940d85 100644 --- a/test/CodeGen/X86/stack-folding-int-avx2.ll +++ b/test/CodeGen/X86/stack-folding-int-avx2.ll @@ -442,7 +442,7 @@ define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) { ;CHECK-LABEL: stack_fold_permd ;CHECK: vpermd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) + %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0) ret <8 x i32> %2 } declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly @@ -461,7 +461,7 @@ define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_permps ;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) + %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0) ret <8 x float> %2 } declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly -- 2.34.1