From 2e363ece75c005db692582c543c2759ffe0ab3ca Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 16 Sep 2014 00:39:08 +0000 Subject: [PATCH] [x86] Remove the last vestiges of the BLENDI-based ADDSUB pattern matching. This design just fundamentally didn't work because ADDSUB is available prior to any legal lowerings of BLENDI nodes. Instead, we have a dedicated ADDSUB synthetic ISD node which is pattern matched trivially into the instructions. These nodes are then recognized by both the existing and a trivial new lowering combine in the backend. Removing these patterns required adding 2 missing shuffle masks to the DAG combine, without which tests would have failed. Added the masks and a helpful assert as well to catch if anything ever goes wrong here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++++++- lib/Target/X86/X86InstrSSE.td | 49 ------------------------------ 2 files changed, 10 insertions(+), 50 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6465fececc8..f7c08a292da 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19990,6 +19990,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, /// they're unused. static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); + EVT VT = N->getValueType(0); // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask @@ -20021,9 +20022,17 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { // We're looking for blends between FADD and FSUB nodes. We insist on these // nodes being lined up in a specific expected pattern. - if (!isShuffleEquivalent(Mask, 0, 5, 2, 7)) + if (!(isShuffleEquivalent(Mask, 0, 3) || + isShuffleEquivalent(Mask, 0, 5, 2, 7) || + isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15))) return SDValue(); + // Only specific types are legal at this point, assert so we notice if and + // when these change. + assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 || + VT == MVT::v4f64) && + "Unknown vector type encountered!"); + // FIXME: Munge the inputs through no-op shuffles that drop the undef lanes to // allow nuking any instructions that feed only those lanes. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 54eb6f7d4c0..9030119edef 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5387,39 +5387,6 @@ let Predicates = [HasAVX] in { (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))), (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>; - - // Constant 170 corresponds to the binary mask '10101010'. - // When used as a blend mask, it allows selecting eight elements from two - // input vectors as follow: - // - Even-numbered values in the destination are copied from - // the corresponding elements in the first input vector; - // - Odd-numbered values in the destination are copied from - // the corresponding elements in the second input vector. - - def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)), - (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i8 170))), - (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; - - // Constant 10 corresponds to the binary mask '1010'. - // In the two pattens below, constant 10 is used as a blend mask to select - // - the 1st and 3rd element from the first input vector (the 'fsub' node); - // - the 2nd and 4th element from the second input vector (the 'fadd' node). - - def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), - (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), - (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; - def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), - (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))), - (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; - def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), - (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))), - (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)), - (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))), - (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), - (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), - (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; } let Predicates = [UseSSE3] in { @@ -5431,22 +5398,6 @@ let Predicates = [UseSSE3] in { (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))), (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; - - // Constant 10 corresponds to the binary mask '1010'. - // In the pattern below, it is used as a blend mask to select: - // - the 1st and 3rd element from the first input vector (the fsub node); - // - the 2nd and 4th element from the second input vector (the fadd node). - - def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), - (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))), - (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; - - def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)), - (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))), - (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; - def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), - (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), - (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; } //===---------------------------------------------------------------------===// -- 2.34.1