R600: Handle fcopysign

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 10 Jun 2014 19:00:20 +0000 (19:00 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 10 Jun 2014 19:00:20 +0000 (19:00 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 10 Jun 2014 19:00:20 +0000 (19:00 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 10 Jun 2014 19:00:20 +0000 (19:00 +0000)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index 849f16919c092ab5392c57b89b9532b46f2f4e6b..cb4f8cde1bfe6142455da2ed4b89c58d6fe7741a 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -214,6 +214,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
    setOperationAction(ISD::UREM, MVT::i32, Expand);
  
+  if (!Subtarget->hasBFI()) {
+    // fcopysign can be done in a single instruction with BFI.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  }
+
    static const MVT::SimpleValueType IntTypes[] = {
      MVT::v2i32, MVT::v4i32
    };
@@ -260,6 +266,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
      setOperationAction(ISD::FNEG, VT, Expand);
      setOperationAction(ISD::SELECT, VT, Expand);
      setOperationAction(ISD::VSELECT, VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
    }
  
    setTargetDAGCombine(ISD::MUL);
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td

index 7d19c3db85b6d1b6bbc6a6780972fee469f70747..1345f79646d1a272a4f9ac4c72a6bf65d889f622 100644 (file)
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -365,7 +365,7 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
  
  // BFI_INT patterns
  
-multiclass BFIPatterns <Instruction BFI_INT> {
+multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
  
    // Definition from ISA doc:
    // (y & x) | (z & ~x)
@@ -381,6 +381,19 @@ multiclass BFIPatterns <Instruction BFI_INT> {
      (BFI_INT $x, $y, $z)
    >;
  
+  def : Pat <
+    (fcopysign f32:$src0, f32:$src1),
+    (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
+  >;
+
+  def : Pat <
+    (f64 (fcopysign f64:$src0, f64:$src1)),
+      (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
+      (BFI_INT (LoadImm32 0x7fffffff),
+               (i32 (EXTRACT_SUBREG $src0, sub1)),
+               (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
+  >;
  }
  
  // SHA-256 Ma patterns
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h

index 1b041d6bd2bd59dc65a2b1bcb46342e7cd1b1762..b3b3a1a0edc895c745d87eec5ff0298cb449e8d2 100644 (file)
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -72,6 +72,10 @@ public:
      return (getGeneration() >= EVERGREEN);
    }
  
+  bool hasBFI() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
    bool hasBFM() const {
      return hasBFE();
    }
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td

index 20654419a8f4c2c45dac5658c635457d81a41c0f..e09a0b2f786b57261fc95713101f812e487dfc6c 100644 (file)
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -295,7 +295,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)),
  def : Pat<(i32 (sext_inreg i32:$src, i16)),
    (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
  
-defm : BFIPatterns <BFI_INT_eg>;
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
  
  def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
    [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 07aba66c45dfd09d9a0a9554d7dfc20c0fef2b7e..43ac47dad292f8284975385cb5e9f95ede735a99 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -2112,7 +2112,7 @@ def : Pat <
    (V_MUL_HI_I32 $src0, $src1, (i32 0))
  >;
  
-defm : BFIPatterns <V_BFI_B32>;
+defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
  def : ROTRPattern <V_ALIGNBIT_B32>;
  
  /********** ======================= **********/
diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll

new file mode 100644 (file)

index 0000000..7b4425b
--- /dev/null
+++ b/test/CodeGen/R600/fcopysign.f32.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; Try to identify arg based on higher address.
+; FUNC-LABEL: @test_copysign_f32:
+; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
+; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
+; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+
+; EG: BFI_INT
+define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
+  %result = call float @llvm.copysign.f32(float %mag, float %sign)
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v2f32:
+; SI: S_ENDPGM
+
+; EG: BFI_INT
+; EG: BFI_INT
+define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) nounwind {
+  %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign)
+  store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v4f32:
+; SI: S_ENDPGM
+
+; EG: BFI_INT
+; EG: BFI_INT
+; EG: BFI_INT
+; EG: BFI_INT
+define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) nounwind {
+  %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign)
+  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll

new file mode 100644 (file)

index 0000000..ea7a6db
--- /dev/null
+++ b/test/CodeGen/R600/fcopysign.f64.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.copysign.f64(double, double) nounwind readnone
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
+
+; FUNC-LABEL: @test_copysign_f64:
+; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; SI: S_ENDPGM
+define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
+  %result = call double @llvm.copysign.f64(double %mag, double %sign)
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v2f64:
+; SI: S_ENDPGM
+define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
+  %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
+  store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_copysign_v4f64:
+; SI: S_ENDPGM
+define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
+  %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
+  store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 10 Jun 2014 19:00:20 +0000 (19:00 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 10 Jun 2014 19:00:20 +0000 (19:00 +0000)
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/R600/AMDGPUSubtarget.h		patch \| blob \| history
lib/Target/R600/EvergreenInstructions.td		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
test/CodeGen/R600/fcopysign.f32.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/fcopysign.f64.ll	[new file with mode: 0644]	patch \| blob