Optimize vector fabs of bitcasted constant integer values.

author Sanjay Patel <spatel@rotateright.com>

Tue, 5 Aug 2014 17:35:22 +0000 (17:35 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 5 Aug 2014 17:35:22 +0000 (17:35 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 5 Aug 2014 17:35:22 +0000 (17:35 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 5 Aug 2014 17:35:22 +0000 (17:35 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 035ce57bf0f678e6037af7075f18c6158faa5b3b..d76b1eb39ebdff0ad2bf7f2e9ea39b499fd344ac 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7409,22 +7409,28 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
    if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
      return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
  
-  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+  // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
    // constant pool values.
-  // TODO: We can also optimize for vectors here, but we need to make sure
-  // that the sign mask is created properly for each vector element.
    if (!TLI.isFAbsFree(VT) &&
-      N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
-      N0.getOperand(0).getValueType().isInteger() &&
-      !VT.isVector()) {
+      N0.getOpcode() == ISD::BITCAST &&
+      N0.getNode()->hasOneUse()) {
      SDValue Int = N0.getOperand(0);
      EVT IntVT = Int.getValueType();
      if (IntVT.isInteger() && !IntVT.isVector()) {
+      APInt SignMask;
+      if (N0.getValueType().isVector()) {
+        // For a vector, get a mask such as 0x7f... per scalar element
+        // and splat it.
+        SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+      } else {
+        // For a scalar, just generate 0x7f...
+        SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+      }
        Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
-             DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+                        DAG.getConstant(SignMask, IntVT));
        AddToWorklist(Int.getNode());
-      return DAG.getNode(ISD::BITCAST, SDLoc(N),
-                         N->getValueType(0), Int);
+      return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
      }
    }
  
diff --git a/test/CodeGen/ARM/fabs-neon.ll b/test/CodeGen/ARM/fabs-neon.ll

index e3094aaf57d0a6a5aa7be071cfd310079063479e..5a176b2e82c3d9547a4ad22142e36a0a6f40f456 100644 (file)
--- a/test/CodeGen/ARM/fabs-neon.ll
+++ b/test/CodeGen/ARM/fabs-neon.ll
@@ -15,3 +15,40 @@ define <2 x float> @test2(<2 x float> %a) {
      ret <2 x float> %foo
  }
  declare <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+
+; No constant pool loads or vector ops are needed for the fabs of a
+; bitcasted integer constant; we should just return integer constants
+; that have the sign bits turned off.
+;
+; So instead of something like this:
+;      mvn     r0, #0
+;      mov     r1, #0
+;      vmov    d16, r1, r0
+;      vabs.f32        d16, d16
+;      vmov    r0, r1, d16
+;      bx      lr
+;
+; We should generate:
+;      mov     r0, #0
+;      mvn     r1, #-2147483648
+;      mov     pc, lr
+
+; CHECK-LABEL: fabs_v2f32_1
+define i64 @fabs_v2f32_1() {
+ %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+; CHECK: mvn r1, #-2147483648
+; CHECK-NOT: vabs
+}
+
+; CHECK-LABEL: fabs_v2f32_2
+define i64 @fabs_v2f32_2() {
+ %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+; CHECK: mvn r0, #-2147483648
+; CHECK-NOT: vabs
+}
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll

index 4c14a9602d41c15784c1630e79a5e8891e0c310b..2271946abefde45c7ec7382e81767b3dadc87413 100644 (file)
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -38,21 +38,38 @@ define <8 x float> @fabs_v8f32(<8 x float> %p)
  declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
  
  ; PR20354: when generating code for a vector fabs op,
-; make sure the correct mask is used for all vector elements.
-; CHECK-LABEL: .LCPI4_0:
-; CHECK-NEXT:    .long 2147483647
-; CHECK-NEXT:    .long 2147483647
-define i64 @fabs_v2f32(<2 x float> %v) {
-; CHECK-LABEL: fabs_v2f32:
-; CHECK:         movabsq $-9223372034707292160, %[[R:r[^ ]+]]
-; CHECK-NEXT:    vmovq %[[R]], %[[X:xmm[0-9]+]]
-; CHECK-NEXT:    vandps   {{.*}}.LCPI4_0{{.*}}, %[[X]], %[[X]]
-; CHECK-NEXT:    vmovq   %[[X]], %rax
-; CHECK-NEXT:    retq
-  %highbits = bitcast i64 9223372039002259456 to <2 x float> ; 0x8000_0000_8000_0000
-  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %highbits)
-  %ret = bitcast <2 x float> %fabs to i64
-  ret i64 %ret
+; make sure that we're only turning off the sign bit of each float value.
+; No constant pool loads or vector ops are needed for the fabs of a
+; bitcasted integer constant; we should just return an integer constant
+; that has the sign bits turned off.
+;
+; So instead of something like this:
+;    movabsq (constant pool load of mask for sign bits) 
+;    vmovq   (move from integer register to vector/fp register)
+;    vandps  (mask off sign bits)
+;    vmovq   (move vector/fp register back to integer return register)
+;
+; We should generate:
+;    mov     (put constant value in return register)
+
+; CHECK-LABEL: fabs_v2f32_1
+define i64 @fabs_v2f32_1() {
+ %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+; CHECK: movabsq $9223372032559808512, %rax
+;  # imm = 0x7FFF_FFFF_0000_0000
+}
+
+; CHECK-LABEL: fabs_v2f32_2
+define i64 @fabs_v2f32_2() {
+ %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+; CHECK: movl $2147483647, %eax
+;  # imm = 0x0000_0000_7FFF_FFFF
  }
  
  declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 5 Aug 2014 17:35:22 +0000 (17:35 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 5 Aug 2014 17:35:22 +0000 (17:35 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/ARM/fabs-neon.ll		patch \| blob \| history
test/CodeGen/X86/vec_fabs.ll		patch \| blob \| history