From 8c20ec54d98176d31f310e4684d1d7f2ea0639bc Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 24 Feb 2011 21:01:34 +0000 Subject: [PATCH] Enable support for vector sext and trunc: Limit the folding of any_ext and sext into the load operation to scalars. Limit the active-bits trunc optimization to scalars. Document vector trunc and vector sext in LangRef. Similar to commit 126080 (for enabling zext). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126424 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.html | 29 +++++---- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 27 ++++++--- test/CodeGen/X86/vec_anyext.ll | 77 ++++++++++++++++++++++++ test/CodeGen/X86/vec_sext.ll | 69 +++++++++++++++++++++ 4 files changed, 179 insertions(+), 23 deletions(-) create mode 100644 test/CodeGen/X86/vec_anyext.ll create mode 100644 test/CodeGen/X86/vec_sext.ll diff --git a/docs/LangRef.html b/docs/LangRef.html index 81fa8cc6e89..fc581f1322d 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -4575,12 +4575,12 @@ entry: type ty2.

Arguments:
-

The 'trunc' instruction takes a value to trunc, which must - be an integer type, and a type that specifies the - size and type of the result, which must be - an integer type. The bit size of value must - be larger than the bit size of ty2. Equal sized types are not - allowed.

+

The 'trunc' instruction takes a value to trunc, and a type to trunc it to. + Both types must be of integer types, or vectors + of the same number of integers. + The bit size of the value must be larger than + the bit size of the destination type, ty2. + Equal sized types are not allowed.

Semantics:

The 'trunc' instruction truncates the high order bits @@ -4590,9 +4590,10 @@ entry:

Example:
-  %X = trunc i32 257 to i8              ; yields i8:1
-  %Y = trunc i32 123 to i1              ; yields i1:true
-  %Z = trunc i32 122 to i1              ; yields i1:false
+  %X = trunc i32 257 to i8                        ; yields i8:1
+  %Y = trunc i32 123 to i1                        ; yields i1:true
+  %Z = trunc i32 122 to i1                        ; yields i1:false
+  %W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
 
@@ -4651,10 +4652,11 @@ entry:

The 'sext' sign extends value to the type ty2.

Arguments:
-

The 'sext' instruction takes a value to cast, which must be of - integer type, and a type to cast it to, which must - also be of integer type. The bit size of the - value must be smaller than the bit size of the destination type, +

The 'sext' instruction takes a value to cast, and a type to cast it to. + Both types must be of integer types, or vectors + of the same number of integers. + The bit size of the value must be smaller than + the bit size of the destination type, ty2.

Semantics:
@@ -4668,6 +4670,7 @@ entry:
   %X = sext i8  -1 to i16              ; yields i16   :65535
   %Y = sext i1 true to i32             ; yields i32:-1
+  %Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c5f0324ac41..911dbfd40f7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3685,7 +3685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and sign extend + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4096,7 +4098,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (load x)) -> (aext (truncate (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and any_ext + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4506,14 +4510,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. For example "trunc (or (shl x, 8), y)" - // -> trunc y - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); - + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currenly we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { diff --git a/test/CodeGen/X86/vec_anyext.ll b/test/CodeGen/X86/vec_anyext.ll new file mode 100644 index 00000000000..d2a4c7f60dd --- /dev/null +++ b/test/CodeGen/X86/vec_anyext.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i16> @func_16_32() { + %F = load <4 x i32>* undef + %G = trunc <4 x i32> %F to <4 x i16> + %H = load <4 x i32>* undef + %Y = trunc <4 x i32> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + store <4 x i16>%T , <4 x i16>* undef + ret <4 x i16> %T +} + +define<4 x i16> @func_16_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i16> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i16> + %T = xor <4 x i16> %Y, %G + store <4 x i16>%T , <4 x i16>* undef + ret <4 x i16> %T +} + +define<4 x i32> @func_32_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i32> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i32> + %T = or <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i8> @func_8_16() { + %F = load <4 x i16>* undef + %G = trunc <4 x i16> %F to <4 x i8> + %H = load <4 x i16>* undef + %Y = trunc <4 x i16> %H to <4 x i8> + %T = add <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i8> @func_8_32() { + %F = load <4 x i32>* undef + %G = trunc <4 x i32> %F to <4 x i8> + %H = load <4 x i32>* undef + %Y = trunc <4 x i32> %H to <4 x i8> + %T = sub <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i8> @func_8_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i8> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i8> + %T = add <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i16> @const_16_32() { + %G = trunc <4 x i32> to <4 x i16> + ret <4 x i16> %G +} + +define<4 x i16> @const_16_64() { + %G = trunc <4 x i64> to <4 x i16> + ret <4 x i16> %G +} + +define void @bugOnTruncBitwidthReduce() nounwind { +meh: + %0 = xor <4 x i64> zeroinitializer, zeroinitializer + %1 = trunc <4 x i64> %0 to <4 x i32> + %2 = lshr <4 x i32> %1, + %3 = xor <4 x i32> %2, %1 + ret void +} diff --git a/test/CodeGen/X86/vec_sext.ll b/test/CodeGen/X86/vec_sext.ll new file mode 100644 index 00000000000..776ddec2e63 --- /dev/null +++ b/test/CodeGen/X86/vec_sext.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i32> @func_16_32() { + %F = load <4 x i16>* undef + %G = sext <4 x i16> %F to <4 x i32> + %H = load <4 x i16>* undef + %Y = sext <4 x i16> %H to <4 x i32> + %T = add <4 x i32> %Y, %G + store <4 x i32>%T , <4 x i32>* undef + ret <4 x i32> %T +} + +define<4 x i64> @func_16_64() { + %F = load <4 x i16>* undef + %G = sext <4 x i16> %F to <4 x i64> + %H = load <4 x i16>* undef + %Y = sext <4 x i16> %H to <4 x i64> + %T = xor <4 x i64> %Y, %G + store <4 x i64>%T , <4 x i64>* undef + ret <4 x i64> %T +} + +define<4 x i64> @func_32_64() { + %F = load <4 x i32>* undef + %G = sext <4 x i32> %F to <4 x i64> + %H = load <4 x i32>* undef + %Y = sext <4 x i32> %H to <4 x i64> + %T = or <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i16> @func_8_16() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i16> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + ret <4 x i16> %T +} + +define<4 x i32> @func_8_32() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i32> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i32> + %T = sub <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i64> @func_8_64() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i64> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i64> + %T = add <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i32> @const_16_32() { + %G = sext <4 x i16> to <4 x i32> + ret <4 x i32> %G +} + +define<4 x i64> @const_16_64() { + %G = sext <4 x i16> to <4 x i64> + ret <4 x i64> %G +} + -- 2.34.1