From: Philip Reames Date: Sat, 14 Feb 2015 00:05:36 +0000 (+0000) Subject: [InstCombine] When canonicalizing gep indices, prefer zext when possible X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=d777c2c0c09f20bf2decf763bb2946291f28960f;hp=c4300b9c0147889e77d2a51ded9fa025545d27e2;p=oota-llvm.git [InstCombine] When canonicalizing gep indices, prefer zext when possible If we know that the sign bit of a value being sign extended is zero, we can use a zero extension instead. This is motivated by the fact that zero extensions are generally cheaper on x86 (and most other architectures?). We already apply a similar transform in DAGCombine, this just extends that to the IR level. This comes up when we eagerly canonicalize gep indices to the width of a machine register (i64 on x86_64). To do so, we insert sign extensions (sext) to promote smaller types. Differential Revision: http://reviews.llvm.org/D7255 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229189 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 3c11c7c3529..9db4e7d2a4b 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1064,6 +1064,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + // If we know that the value being extended is positive, we can use a zext + // instead. + bool KnownZero, KnownOne; + ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); + if (KnownZero) { + Value *ZExt = Builder->CreateZExt(Src, DestTy); + return ReplaceInstUsesWith(CI, ZExt); + } + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also diff --git a/test/Transforms/InstCombine/gep-sext.ll b/test/Transforms/InstCombine/gep-sext.ll new file mode 100644 index 00000000000..3d23dab4879 --- /dev/null +++ b/test/Transforms/InstCombine/gep-sext.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-win32" + +declare void @use(i32) readonly + +; We prefer to canonicalize the machine width gep indices early +define void @test(i32* %p, i32 %index) { +; CHECK-LABEL: @test +; CHECK-NEXT: %1 = sext i32 %index to i64 +; CHECK-NEXT: %addr = getelementptr i32* %p, i64 %1 + %addr = getelementptr i32* %p, i32 %index + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +; If they've already been canonicalized via zext, that's fine +define void @test2(i32* %p, i32 %index) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: %i = zext i32 %index to i64 +; CHECK-NEXT: %addr = getelementptr i32* %p, i64 %i + %i = zext i32 %index to i64 + %addr = getelementptr i32* %p, i64 %i + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +; If we can use a zext, we prefer that. This requires +; knowing that the index is positive. +define void @test3(i32* %p, i32 %index) { +; CHECK-LABEL: @test3 +; CHECK: zext +; CHECK-NOT: sext + %addr_begin = getelementptr i32* %p, i64 40 + %addr_fixed = getelementptr i32* %addr_begin, i64 48 + %val_fixed = load i32* %addr_fixed, !range !0 + %addr = getelementptr i32* %addr_begin, i32 %val_fixed + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +; Replace sext with zext where possible +define void @test4(i32* %p, i32 %index) { +; CHECK-LABEL: @test4 +; CHECK: zext +; CHECK-NOT: sext + %addr_begin = getelementptr i32* %p, i64 40 + %addr_fixed = getelementptr i32* %addr_begin, i64 48 + %val_fixed = load i32* %addr_fixed, !range !0 + %i = sext i32 %val_fixed to i64 + %addr = getelementptr i32* %addr_begin, i64 %i + %val = load i32* %addr + call void @use(i32 %val) + ret void +} + +;; !range !0 +!0 = !{i32 0, i32 2147483647} + + +