From 1670bbc481e066c1fc98e9ebb507b6ca12206a60 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Thu, 20 Aug 2015 20:59:02 +0000 Subject: [PATCH] [NVPTX] truncating 64-bit to 32-bit is free Summary: Add an LSR test that exercises isTruncateFree. Without this change, LSR creates another indvar representing the truncated value. Reviewers: jholewinski, eliben Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D12058 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.h | 8 ++++ .../LoopStrengthReduce/NVPTX/lit.local.cfg | 2 + .../LoopStrengthReduce/NVPTX/trunc.ll | 45 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg create mode 100644 test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index e5c37321a33..be9a9e0369f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -459,6 +459,14 @@ public: bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { + // Truncating 64-bit to 32-bit is free in SASS. + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + return SrcTy->getPrimitiveSizeInBits() == 64 && + DstTy->getPrimitiveSizeInBits() == 32; + } + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg new file mode 100644 index 00000000000..2cb98eb371b --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'NVPTX' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll new file mode 100644 index 00000000000..a16065b4dfb --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; This confirms that NVPTXTTI considers a 64-to-32 integer trunc free. If such +; truncs were not considered free, LSR would promote (int)i as a separate +; induction variable in the following example. +; +; for (long i = begin; i != end; i += stride) +; use((int)i); +; +; That would be worthless, because "i" is simulated by two 32-bit registers and +; truncating it to 32-bit is as simple as directly using the register that +; contains the low bits. +define void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) { +; CHECK-LABEL: @trunc_is_free( +entry: + %cmp.4 = icmp eq i64 %begin, %end + br i1 %cmp.4, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; CHECK: for.body: + %i.05 = phi i64 [ %add, %for.body ], [ %begin, %for.body.preheader ] + %conv = trunc i64 %i.05 to i32 +; CHECK: trunc i64 %{{[^ ]+}} to i32 + tail call void @_Z3usei(i32 %conv) #2 + %add = add nsw i64 %i.05, %stride + %cmp = icmp eq i64 %add, %end + br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body +} + +declare void @_Z3usei(i32) + +!nvvm.annotations = !{!0} +!0 = !{void (i64, i64, i64)* @trunc_is_free, !"kernel", i32 1} -- 2.34.1