From: Mehdi Amini <mehdi.amini@apple.com>
Date: Mon, 16 Feb 2015 21:47:54 +0000 (+0000)
Subject: InstCombine: fold more cases of (fp_to_u/sint (u/sint_to_fp val))
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=e97c675022d7aba58992d6fb0e2a5f139f0db0a2

InstCombine: fold more cases of (fp_to_u/sint (u/sint_to_fp val))

Fixes radar 15486701.

From: Fiona Glaser <fglaser@apple.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229437 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9db4e7d2a4b..3e2b71945a9 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1341,22 +1341,57 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) {
   return commonCastTransforms(CI);
 }
 
+// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
+// This is safe if the intermediate type has enough bits in its mantissa to
+// accurately represent all values of X.  For example, this won't work with
+// i64 -> float -> i64.
+Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) {
+  if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
+    return nullptr;
+  Instruction *OpI = cast<Instruction>(FI.getOperand(0));
+
+  Value *SrcI = OpI->getOperand(0);
+  Type *FITy = FI.getType();
+  Type *OpITy = OpI->getType();
+  Type *SrcTy = SrcI->getType();
+  bool IsInputSigned = isa<SIToFPInst>(OpI);
+  bool IsOutputSigned = isa<FPToSIInst>(FI);
+
+  // We can safely assume the conversion won't overflow the output range,
+  // because (for example) (uint8_t)18293.f is undefined behavior.
+
+  // Since we can assume the conversion won't overflow, our decision as to
+  // whether the input will fit in the float should depend on the minimum
+  // of the input range and output range.
+
+  // This means this is also safe for a signed input and unsigned output, since
+  // a negative input would lead to undefined behavior.
+  int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned;
+  int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned;
+  int ActualSize = std::min(InputSize, OutputSize);
+
+  if (ActualSize <= OpITy->getFPMantissaWidth()) {
+    if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) {
+      if (IsInputSigned && IsOutputSigned)
+        return new SExtInst(SrcI, FITy);
+      return new ZExtInst(SrcI, FITy);
+    }
+    if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits())
+      return new TruncInst(SrcI, FITy);
+    if (SrcTy == FITy)
+      return ReplaceInstUsesWith(FI, SrcI);
+    return new BitCastInst(SrcI, FITy);
+  }
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
   Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
   if (!OpI)
     return commonCastTransforms(FI);
 
-  // fptoui(uitofp(X)) --> X
-  // fptoui(sitofp(X)) --> X
-  // This is safe if the intermediate type has enough bits in its mantissa to
-  // accurately represent all values of X.  For example, do not do this with
-  // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui.
-  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
-      OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
-                    OpI->getType()->getFPMantissaWidth())
-    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  if (Instruction *I = FoldItoFPtoI(FI))
+    return I;
 
   return commonCastTransforms(FI);
 }
@@ -1366,17 +1401,8 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
   if (!OpI)
     return commonCastTransforms(FI);
 
-  // fptosi(sitofp(X)) --> X
-  // fptosi(uitofp(X)) --> X
-  // This is safe if the intermediate type has enough bits in its mantissa to
-  // accurately represent all values of X.  For example, do not do this with
-  // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui.
-  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
-      OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getScalarSizeInBits() <=
-                    OpI->getType()->getFPMantissaWidth())
-    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  if (Instruction *I = FoldItoFPtoI(FI))
+    return I;
 
   return commonCastTransforms(FI);
 }
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index d12ee09d52f..1a929349601 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -245,6 +245,7 @@ public:
   Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
                             Value *A, Value *B, Instruction &Outer,
                             SelectPatternFlavor SPF2, Value *C);
+  Instruction *FoldItoFPtoI(Instruction &FI);
   Instruction *visitSelectInst(SelectInst &SI);
   Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
   Instruction *visitCallInst(CallInst &CI);
diff --git a/test/Transforms/InstCombine/sitofp.ll b/test/Transforms/InstCombine/sitofp.ll
index 48c0d71e783..82097783883 100644
--- a/test/Transforms/InstCombine/sitofp.ll
+++ b/test/Transforms/InstCombine/sitofp.ll
@@ -72,3 +72,113 @@ define i32 @test8(i32 %A) nounwind {
   ret i32 %C
 }
 
+; CHECK-LABEL: test9
+; CHECK: zext i8
+; CHECK-NEXT: ret i32
+define i32 @test9(i8 %A) nounwind {
+  %B = sitofp i8 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; CHECK-LABEL: test10
+; CHECK: sext i8
+; CHECK-NEXT: ret i32
+define i32 @test10(i8 %A) nounwind {
+  %B = sitofp i8 %A to float
+  %C = fptosi float %B to i32
+  ret i32 %C
+}
+
+; If the input value is outside of the range of the output cast, it's
+; undefined behavior, so we can assume it fits.
+; CHECK-LABEL: test11
+; CHECK: trunc
+; CHECK-NEXT: ret i8
+define i8 @test11(i32 %A) nounwind {
+  %B = sitofp i32 %A to float
+  %C = fptosi float %B to i8
+  ret i8 %C
+}
+
+; If the input value is negative, it'll be outside the range of the
+; output cast, and thus undefined behavior.
+; CHECK-LABEL: test12
+; CHECK: zext i8
+; CHECK-NEXT: ret i32
+define i32 @test12(i8 %A) nounwind {
+  %B = sitofp i8 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; This can't fold because the 25-bit input doesn't fit in the mantissa.
+; CHECK-LABEL: test13
+; CHECK: uitofp
+; CHECK-NEXT: fptoui
+define i32 @test13(i25 %A) nounwind {
+  %B = uitofp i25 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; But this one can.
+; CHECK-LABEL: test14
+; CHECK: zext i24
+; CHECK-NEXT: ret i32
+define i32 @test14(i24 %A) nounwind {
+  %B = uitofp i24 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; And this one can too.
+; CHECK-LABEL: test15
+; CHECK: trunc i32
+; CHECK-NEXT: ret i24
+define i24 @test15(i32 %A) nounwind {
+  %B = uitofp i32 %A to float
+  %C = fptoui float %B to i24
+  ret i24 %C
+}
+
+; This can fold because the 25-bit input is signed and we disard the sign bit.
+; CHECK-LABEL: test16
+; CHECK: zext
+define i32 @test16(i25 %A) nounwind {
+ %B = sitofp i25 %A to float
+ %C = fptoui float %B to i32
+ ret i32 %C
+}
+
+; This can't fold because the 26-bit input won't fit the mantissa
+; even after disarding the signed bit.
+; CHECK-LABEL: test17
+; CHECK: sitofp
+; CHECK-NEXT: fptoui
+define i32 @test17(i26 %A) nounwind {
+ %B = sitofp i26 %A to float
+ %C = fptoui float %B to i32
+ ret i32 %C
+}
+
+; This can fold because the 54-bit output is signed and we disard the sign bit.
+; CHECK-LABEL: test18
+; CHECK: trunc
+define i54 @test18(i64 %A) nounwind {
+ %B = sitofp i64 %A to double
+ %C = fptosi double %B to i54
+ ret i54 %C
+}
+
+; This can't fold because the 55-bit output won't fit the mantissa
+; even after disarding the sign bit.
+; CHECK-LABEL: test19
+; CHECK: sitofp
+; CHECK-NEXT: fptosi
+define i55 @test19(i64 %A) nounwind {
+ %B = sitofp i64 %A to double
+ %C = fptosi double %B to i55
+ ret i55 %C
+}
+