From 405e5f276bde439f0c100f542c5ed854a929f1d9 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 8 May 2015 15:11:07 +0000 Subject: [PATCH] [X86] Teach 'getTargetShuffleMask' how to look through ISD::WrapperRIP when decoding a PSHUFB mask. The function 'getTargetShuffleMask' already knows how to deal with PSHUFB nodes where the mask node is a load from constant pool, and the constant pool node is wrapped by a X86ISD::Wrapper node. This patch extends that logic by teaching it how to also look through X86ISD::WrapperRIP. This helps function combineX86ShufflesRecusively to combine more shuffle sequences containing PSHUFB nodes if we are in RIPRel PIC mode. Before this change, llc (with -relocation-model=pic -march=x86-64) was unable to decode a pshufb where the mask was loaded from a constant pool. For example, the no-op shuffle from test 'x86-fold-pshufb.ll' was not folded into its operand, so instead of generating a single 'movaps' the backend always generated a sub-optimal 'movdqa + pshufb' sequence. Added test x86-fold-pshufb.ll. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236863 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 ++- test/CodeGen/X86/x86-fold-pshufb.ll | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/x86-fold-pshufb.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cac5b54187b..35517212acc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4352,7 +4352,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, return false; SDValue Ptr = MaskLoad->getBasePtr(); - if (Ptr->getOpcode() == X86ISD::Wrapper) + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr->getOperand(0); auto *MaskCP = dyn_cast(Ptr); diff --git a/test/CodeGen/X86/x86-fold-pshufb.ll b/test/CodeGen/X86/x86-fold-pshufb.ll new file mode 100644 index 00000000000..c29e592bfe8 --- /dev/null +++ b/test/CodeGen/X86/x86-fold-pshufb.ll @@ -0,0 +1,17 @@ +; RUN: llc -relocation-model=pic -march=x86-64 -mtriple=x86_64-unknown-unknown -mattr=+ssse3 < %s | FileCheck %s + +; Verify that the backend correctly folds the shuffle in function 'fold_pshufb' +; into a simple load from constant pool. + +define <2 x i64> @fold_pshufb() { +; CHECK-LABEL: fold_pshufb: +; CHECK: # BB#0: +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0] +; CHECK-NEXT: retq +entry: + %0 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> , <16 x i8> ) + %1 = bitcast <16 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) -- 2.34.1