Implement flt_rounds for PowerPC.

author Dale Johannesen <dalej@apple.com>

Fri, 18 Jan 2008 19:55:37 +0000 (19:55 +0000)

committer Dale Johannesen <dalej@apple.com>

Fri, 18 Jan 2008 19:55:37 +0000 (19:55 +0000)
author Dale Johannesen <dalej@apple.com>
Fri, 18 Jan 2008 19:55:37 +0000 (19:55 +0000)
committer Dale Johannesen <dalej@apple.com>
Fri, 18 Jan 2008 19:55:37 +0000 (19:55 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 20e2632fbe94abddc77190b6d02e8fbba76ea362..87f101ded876eb4857ffa62b28cdd6480b33df1b 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -109,6 +109,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
    setOperationAction(ISD::FREM , MVT::f32, Expand);
    setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+  setOperationAction(ISD::FLT_ROUNDS, MVT::i32, Custom);
    
    // If we're enabling GP optimizations, use hardware square root
    if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
@@ -2207,6 +2209,67 @@ static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
    return FP;
  }
  
+static SDOperand LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
+  /*
+   The rounding mode is in bits 30:31 of FPSR, and has the following
+   settings:
+     00 Round to nearest
+     01 Round to 0
+     10 Round to +inf
+     11 Round to -inf
+
+  FLT_ROUNDS, on the other hand, expects the following:
+    -1 Undefined
+     0 Round to 0
+     1 Round to nearest
+     2 Round to +inf
+     3 Round to -inf
+
+  To perform the conversion, we do:
+    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+  */
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  std::vector<MVT::ValueType> NodeTys;
+  SDOperand MFFSreg, InFlag;
+
+  // Save FP Control Word to register
+  NodeTys.push_back(MVT::f64);    // return register
+  NodeTys.push_back(MVT::Flag);   // unused in this context
+  SDOperand Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0);
+
+  // Save FP register to stack slot
+  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+  SDOperand StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+  SDOperand Store = DAG.getStore(DAG.getEntryNode(), Chain,
+                                 StackSlot, NULL, 0);
+
+  // Load FP Control Word from low 32 bits of stack slot.
+  SDOperand Four = DAG.getConstant(4, PtrVT);
+  SDOperand Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four);
+  SDOperand CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0);
+
+  // Transform as necessary
+  SDOperand CWD1 =
+    DAG.getNode(ISD::AND, MVT::i32,
+                CWD, DAG.getConstant(3, MVT::i32));
+  SDOperand CWD2 =
+    DAG.getNode(ISD::SRL, MVT::i32,
+                DAG.getNode(ISD::AND, MVT::i32,
+                            DAG.getNode(ISD::XOR, MVT::i32,
+                                        CWD, DAG.getConstant(3, MVT::i32)),
+                            DAG.getConstant(3, MVT::i32)),
+                DAG.getConstant(1, MVT::i8));
+
+  SDOperand RetVal =
+    DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2);
+
+  return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
+}
+
  static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) {
    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
@@ -3027,6 +3090,7 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
    case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
    case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
    case ISD::FP_ROUND_INREG:     return LowerFP_ROUND_INREG(Op, DAG);
+  case ISD::FLT_ROUNDS:         return LowerFLT_ROUNDS(Op, DAG);
  
    // Lower 64-bit shifts.
    case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
diff --git a/test/CodeGen/PowerPC/frounds.ll b/test/CodeGen/PowerPC/frounds.ll

new file mode 100644 (file)

index 0000000..159639e
--- /dev/null
+++ b/test/CodeGen/PowerPC/frounds.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as | llc -march=ppc32
+
+define i32 @foo() {
+entry:
+       %retval = alloca i32            ; <i32*> [#uses=2]
+       %tmp = alloca i32               ; <i32*> [#uses=2]
+       %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+       %tmp1 = call i32 @llvm.flt.rounds( )            ; <i32> [#uses=1]
+       store i32 %tmp1, i32* %tmp, align 4
+       %tmp2 = load i32* %tmp, align 4         ; <i32> [#uses=1]
+       store i32 %tmp2, i32* %retval, align 4
+       br label %return
+
+return:                ; preds = %entry
+       %retval3 = load i32* %retval            ; <i32> [#uses=1]
+       ret i32 %retval3
+}
+
+declare i32 @llvm.flt.rounds() nounwind
author	Dale Johannesen <dalej@apple.com>
	Fri, 18 Jan 2008 19:55:37 +0000 (19:55 +0000)
committer	Dale Johannesen <dalej@apple.com>
	Fri, 18 Jan 2008 19:55:37 +0000 (19:55 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
test/CodeGen/PowerPC/frounds.ll	[new file with mode: 0644]	patch \| blob