Add a llvm.copysign intrinsic

author Hal Finkel <hfinkel@anl.gov>

Mon, 19 Aug 2013 23:35:46 +0000 (23:35 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Mon, 19 Aug 2013 23:35:46 +0000 (23:35 +0000)
author Hal Finkel <hfinkel@anl.gov>
Mon, 19 Aug 2013 23:35:46 +0000 (23:35 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Mon, 19 Aug 2013 23:35:46 +0000 (23:35 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index 47c89e8d24ad00fb5acc2584da2a7a99b4ee7597..68ac96f9b5a8579c47885ff9a7ffa2e6abdbd5ac 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -7353,6 +7353,42 @@ Semantics:
  This function returns the same values as the libm ``fabs`` functions
  would, and handles error conditions in the same way.
  
+'``llvm.copysign.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.copysign`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.copysign.f32(float  %Mag, float  %Sgn)
+      declare double    @llvm.copysign.f64(double %Mag, double %Sgn)
+      declare x86_fp80  @llvm.copysign.f80(x86_fp80  %Mag, x86_fp80  %Sgn)
+      declare fp128     @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
+      declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128  %Mag, ppc_fp128  %Sgn)
+
+Overview:
+"""""""""
+
+The '``llvm.copysign.*``' intrinsics return a value with the magnitude of the
+first operand and the sign of the second operand.
+
+Arguments:
+""""""""""
+
+The arguments and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``copysign``
+functions would, and handles error conditions in the same way.
+
  '``llvm.floor.*``' Intrinsic
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td

index 856e0a5824dbf78c07575aa73abb148e2bf00784..c7414e063fef4fd1bb2a7f9bbfa936366575976a 100644 (file)
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -296,6 +296,8 @@ let Properties = [IntrReadMem] in {
    def int_exp  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
    def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
    def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_copysign : Intrinsic<[llvm_anyfloat_ty],
+                               [LLVMMatchType<0>, LLVMMatchType<0>]>;
    def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
    def int_ceil  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
    def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp

index ef532357c5c6500e702f5b6053a80043d78b2201..0883ab0ce7f1e54179cd1a5f9f250ade891d50f1 100644 (file)
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -443,6 +443,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
    case Intrinsic::log10:   ISD = ISD::FLOG10; break;
    case Intrinsic::log2:    ISD = ISD::FLOG2;  break;
    case Intrinsic::fabs:    ISD = ISD::FABS;   break;
+  case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
    case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
    case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
    case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

index 237a33a0cfff155a3cc740fcc52d0b85b995de63..b768f39e570debc5622547cf103adf1d0c8e7ddc 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
    case ISD::FP_TO_UINT:
    case ISD::FNEG:
    case ISD::FABS:
+  case ISD::FCOPYSIGN:
    case ISD::FSQRT:
    case ISD::FSIN:
    case ISD::FCOS:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

index bf65319295b1a4d8fee9459e27644f1e47e5804b..fd5f97718f55a6278b6104ea92fe3dff4cfddfdd 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -98,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
    case ISD::ADD:
    case ISD::AND:
    case ISD::FADD:
+  case ISD::FCOPYSIGN:
    case ISD::FDIV:
    case ISD::FMUL:
    case ISD::FPOW:
@@ -557,6 +558,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
    case ISD::SUB:
    case ISD::MUL:
    case ISD::FADD:
+  case ISD::FCOPYSIGN:
    case ISD::FSUB:
    case ISD::FMUL:
    case ISD::SDIV:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 3b9f3581a4cdaf6cc8e22b95c14c8e615021ba88..df9293da261e74559b93606198ffcdc8d3fba382 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4925,6 +4925,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(0))));
      return 0;
    }
+  case Intrinsic::copysign:
+    setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return 0;
    case Intrinsic::fma:
      setValue(&I, DAG.getNode(ISD::FMA, sdl,
                               getValue(I.getArgOperand(0)).getValueType(),
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp

index 55125bd23f9fd60cc6189517233353cf88d16d56..a2ea35e64a193b958eaff7341ec30ecc21abfac6 100644 (file)
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -697,6 +697,11 @@ void TargetLoweringBase::initActions() {
  
      // These library functions default to expand.
      setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand);
+
+    // These operations default to expand for vector types.
+    if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
+        VT <= MVT::LAST_VECTOR_VALUETYPE)
+      setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
    }
  
    // Most targets ignore the @llvm.prefetch intrinsic.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp

index d9f132363b15e359ac76419bcbf7b0096de0d51e..4224ae2d273c991a775f7d574fec16b49c0bf77e 100644 (file)
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -253,6 +253,12 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
            case Intrinsic::sin:
            case Intrinsic::cos:
              return true;
+          case Intrinsic::copysign:
+            if (CI->getArgOperand(0)->getType()->getScalarType()->
+                isPPC_FP128Ty())
+              return true;
+            else
+              continue; // ISD::FCOPYSIGN is never a library call.
            case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
            case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
            case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index e452acdafac2dab36dc62c51bb0ca62bcf89413f..a7026f68a2c259e750869705156a59b74186f019 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1767,6 +1767,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
      case Intrinsic::log10:
      case Intrinsic::log2:
      case Intrinsic::fabs:
+    case Intrinsic::copysign:
      case Intrinsic::floor:
      case Intrinsic::ceil:
      case Intrinsic::trunc:
@@ -1831,6 +1832,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
    case LibFunc::fabsf:
    case LibFunc::fabsl:
      return Intrinsic::fabs;
+  case LibFunc::copysign:
+  case LibFunc::copysignf:
+  case LibFunc::copysignl:
+    return Intrinsic::copysign;
    case LibFunc::floor:
    case LibFunc::floorf:
    case LibFunc::floorl:
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll

index 566dcc75cd8175e156b08cab9dcc72b28fbb9e73..216b937ad6d0b2045ac688634d85f1ad114e2c1e 100644 (file)
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -468,6 +468,59 @@ for.end:                                          ; preds = %for.body, %entry
  
  declare double @llvm.fabs(double) nounwind readnone
  
+;CHECK-LABEL: @copysign_f32(
+;CHECK: llvm.copysign.v4f32
+;CHECK: ret void
+define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx1, align 4
+  %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+
+define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.copysign(double, double) nounwind readnone
+
  ;CHECK-LABEL: @floor_f32(
  ;CHECK: llvm.floor.v4f32
  ;CHECK: ret void
author	Hal Finkel <hfinkel@anl.gov>
	Mon, 19 Aug 2013 23:35:46 +0000 (23:35 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Mon, 19 Aug 2013 23:35:46 +0000 (23:35 +0000)
docs/LangRef.rst		patch \| blob \| history
include/llvm/IR/Intrinsics.td		patch \| blob \| history
lib/CodeGen/BasicTargetTransformInfo.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/CodeGen/TargetLoweringBase.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCCTRLoops.cpp		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/intrinsic.ll		patch \| blob \| history