R600: Fix miscompiles when BFE has multiple uses
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 15 Oct 2014 17:58:34 +0000 (17:58 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 15 Oct 2014 17:58:34 +0000 (17:58 +0000)
SimplifyDemandedBits would break the other uses of the operand.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219819 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/R600/AMDGPUISelLowering.cpp
test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll

index bac4ca03b33e2fbe4352a0dec576cb92171c3f1c..fa384db230b96315963e1eaa1555a3641735c8ea 100644 (file)
@@ -2170,13 +2170,16 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                          BitsFrom, ShiftVal);
     }
 
-    APInt KnownZero, KnownOne;
-    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                          !DCI.isBeforeLegalizeOps());
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
-        TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
-      DCI.CommitTargetLoweringOpt(TLO);
+    if (BitsFrom.hasOneUse()) {
+      APInt KnownZero, KnownOne;
+      TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                            !DCI.isBeforeLegalizeOps());
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+          TLI.SimplifyDemandedBits(BitsFrom, Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        DCI.CommitTargetLoweringOpt(TLO);
+      }
     }
 
     break;
index d065c2384f15968d12fa41699de6b6a1728b179c..1d239af7aa4517ecde2e7d740c75fcda51ca6541 100644 (file)
@@ -552,3 +552,25 @@ define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; Make sure that SimplifyDemandedBits doesn't cause the and to be
+; reduced to the bits demanded by the bfe.
+
+; XXX: The operand to v_bfe_u32 could also just directly be the load register.
+; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
+; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
+; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
+; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
+; SI-DAG: BUFFER_STORE_DWORD [[AND]]
+; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
+; SI: S_ENDPGM
+define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
+                                            i32 addrspace(1)* %out1,
+                                            i32 addrspace(1)* %in) nounwind {
+  %src = load i32 addrspace(1)* %in, align 4
+  %and = and i32 %src, 63
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
+  store i32 %and, i32 addrspace(1)* %out1, align 4
+  ret void
+}