From: Manuel Jacob Date: Thu, 13 Mar 2014 13:36:25 +0000 (+0000) Subject: CodeGenPrep: sink extends of illegal types into use block. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=f8909fa1406eba023f9fadfa7e8182630d3ea14a CodeGenPrep: sink extends of illegal types into use block. Summary: This helps the instruction selector to lower an i64 * i64 -> i128 multiplication into a single instruction on targets which support it. This is an update of D2973 which was reverted because of a bug reported as PR19084. Reviewers: t.p.northover, chapuni Reviewed By: t.p.northover CC: llvm-commits, alex, chapuni Differential Revision: http://llvm-reviews.chandlerc.com/D3021 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203797 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index d53cdb97d9b..428d8af359e 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -464,40 +464,8 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop -/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), -/// sink it into user blocks to reduce the number of virtual -/// registers that must be created and coalesced. -/// -/// Return true if any changes are made. -/// -static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ - // If this is a noop copy, - EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(CI->getType()); - - // This is an fp<->int conversion? - if (SrcVT.isInteger() != DstVT.isInteger()) - return false; - - // If this is an extension, it will be a zero or sign extension, which - // isn't a noop. - if (SrcVT.bitsLT(DstVT)) return false; - - // If these values will be promoted, find out what they will be promoted - // to. This helps us consider truncates on PPC as noop copies when they - // are. - if (TLI.getTypeAction(CI->getContext(), SrcVT) == - TargetLowering::TypePromoteInteger) - SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(CI->getContext(), DstVT) == - TargetLowering::TypePromoteInteger) - DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); - - // If, after promotion, these are the same types, this is a noop copy. - if (SrcVT != DstVT) - return false; - +/// SinkCast - Sink the specified cast instruction into its user blocks +static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCasts - Only insert a cast in each block once. @@ -547,6 +515,43 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ return MadeChange; } +/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop +/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), +/// sink it into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. +/// +/// Return true if any changes are made. +/// +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ + // If this is a noop copy, + EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(CI->getType()); + + // This is an fp<->int conversion? + if (SrcVT.isInteger() != DstVT.isInteger()) + return false; + + // If this is an extension, it will be a zero or sign extension, which + // isn't a noop. + if (SrcVT.bitsLT(DstVT)) return false; + + // If these values will be promoted, find out what they will be promoted + // to. This helps us consider truncates on PPC as noop copies when they + // are. + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); + + // If, after promotion, these are the same types, this is a noop copy. + if (SrcVT != DstVT) + return false; + + return SinkCast(CI); +} + /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers @@ -2811,8 +2816,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return true; if (isa(I) || isa(I)) { - bool MadeChange = MoveExtToFormExtLoad(I); - return MadeChange | OptimizeExtUses(I); + /// Sink a zext or sext into its user blocks if the target type doesn't + /// fit in one register + if (TLI && TLI->getTypeAction(CI->getContext(), + TLI->getValueType(CI->getType())) == + TargetLowering::TypeExpandInteger) { + return SinkCast(CI); + } else { + bool MadeChange = MoveExtToFormExtLoad(I); + return MadeChange | OptimizeExtUses(I); + } } return false; } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index a30f8c7d985..52d3c01076d 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1444,54 +1444,6 @@ it would be nice to produce "into" someday. //===---------------------------------------------------------------------===// -This code: - -void vec_mpys1(int y[], const int x[], int scaler) { -int i; -for (i = 0; i < 150; i++) - y[i] += (((long long)scaler * (long long)x[i]) >> 31); -} - -Compiles to this loop with GCC 3.x: - -.L5: - movl %ebx, %eax - imull (%edi,%ecx,4) - shrdl $31, %edx, %eax - addl %eax, (%esi,%ecx,4) - incl %ecx - cmpl $149, %ecx - jle .L5 - -llvm-gcc compiles it to the much uglier: - -LBB1_1: ## bb1 - movl 24(%esp), %eax - movl (%eax,%edi,4), %ebx - movl %ebx, %ebp - imull %esi, %ebp - movl %ebx, %eax - mull %ecx - addl %ebp, %edx - sarl $31, %ebx - imull %ecx, %ebx - addl %edx, %ebx - shldl $1, %eax, %ebx - movl 20(%esp), %eax - addl %ebx, (%eax,%edi,4) - incl %edi - cmpl $150, %edi - jne LBB1_1 ## bb1 - -The issue is that we hoist the cast of "scaler" to long long outside of the -loop, the value comes into the loop as two values, and -RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the -constructed BUILD_PAIR which represents the cast value. - -This can be handled by making CodeGenPrepare sink the cast. - -//===---------------------------------------------------------------------===// - Test instructions can be eliminated by using EFLAGS values from arithmetic instructions. This is currently not done for mul, and, or, xor, neg, shl, sra, srl, shld, shrd, atomic ops, and others. It is also currently not done diff --git a/test/CodeGen/X86/mul128_sext_loop.ll b/test/CodeGen/X86/mul128_sext_loop.ll new file mode 100644 index 00000000000..a516f03cbc3 --- /dev/null +++ b/test/CodeGen/X86/mul128_sext_loop.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +define void @test(i64* nocapture %arr, i64 %arrsize, i64 %factor) nounwind uwtable { + %1 = icmp sgt i64 %arrsize, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = sext i64 %factor to i128 + br label %3 + +;