//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h"
using namespace llvm;
X86TargetLowering::X86TargetLowering(TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- X86ScalarSSE = Subtarget->hasSSE2();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
RegInfo = TM.getRegisterInfo();
setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
} else {
- if (X86ScalarSSE)
+ if (X86ScalarSSEf64)
// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
else
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
// SSE has no i16 to fp conversion, only i32
- if (X86ScalarSSE)
+ if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
- else {
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ } else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
- if (!Subtarget->is64Bit()) {
- // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
- setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
- setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
- }
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
- if (X86ScalarSSE) {
+ if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else {
- if (X86ScalarSSE && !Subtarget->hasSSE3())
+ if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSE) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
}
+ // Scalar integer multiply, multiply-high, divide, and remainder are
+ // lowered to use operations that produce two results, to match the
+ // available instructions. This exposes the two-result form to trivial
+ // CSE, which is able to combine x/y and x%y into a single instruction,
+ // for example. The single-result multiply instructions are introduced
+ // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part
+ // is not needed.
+ setOperationAction(ISD::MUL , MVT::i8 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i8 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i8 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i8 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i8 , Expand);
+ setOperationAction(ISD::SREM , MVT::i8 , Expand);
+ setOperationAction(ISD::UREM , MVT::i8 , Expand);
+ setOperationAction(ISD::MUL , MVT::i16 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i16 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i16 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i16 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i16 , Expand);
+ setOperationAction(ISD::SREM , MVT::i16 , Expand);
+ setOperationAction(ISD::UREM , MVT::i16 , Expand);
+ setOperationAction(ISD::MUL , MVT::i32 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i32 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i32 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i32 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i32 , Expand);
+ setOperationAction(ISD::SREM , MVT::i32 , Expand);
+ setOperationAction(ISD::UREM , MVT::i32 , Expand);
+ setOperationAction(ISD::MUL , MVT::i64 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i64 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i64 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i64 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i64 , Expand);
+ setOperationAction(ISD::SREM , MVT::i64 , Expand);
+ setOperationAction(ISD::UREM , MVT::i64 , Expand);
+
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
-
+ setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom);
+
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f80 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f80 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
setOperationAction(ISD::MEMSET , MVT::Other, Custom);
setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
- // We don't have line number support yet.
+ // Use the default ISD::LOCATION expansion.
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
setExceptionPointerRegister(X86::EAX);
setExceptionSelectorRegister(X86::EDX);
}
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
- setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand);
- setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand);
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- if (X86ScalarSSE) {
+ if (X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
addRegisterClass(MVT::f64, X86::FR64RegisterClass);
// cases we handle.
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
- addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
// Conversions to long double (in X87) go through memory.
setConvertAction(MVT::f32, MVT::f80, Expand);
// Conversions from long double (in X87) go through memory.
setConvertAction(MVT::f80, MVT::f32, Expand);
setConvertAction(MVT::f80, MVT::f64, Expand);
+ } else if (X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ // SSE->x87 conversions go through memory.
+ setConvertAction(MVT::f32, MVT::f64, Expand);
+ setConvertAction(MVT::f32, MVT::f80, Expand);
+
+ // x87->SSE truncations need to go through memory.
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f64, MVT::f32, Expand);
+ // And x87->x87 truncations also.
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
} else {
+ // f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
}
// Long double always uses X87.
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f80 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ }
+
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand);
}
if (Subtarget->hasMMX()) {
setOperationAction(ISD::SUB, MVT::v8i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i16, Legal);
setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v1i64, Legal);
setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
setOperationAction(ISD::MUL, MVT::v4i16, Legal);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(MVT::getVectorNumElements(VT)))
+ continue;
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
}
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
}
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy());
+ if (!Subtarget->isPICStyleRIPRel())
+ return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy());
+ return Table;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "X86GenCallingConv.inc"
-
+
+/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
+/// exists skip possible ISD:TokenFactor.
+static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
+ if (Chain.getOpcode() == X86ISD::TAILCALL) {
+ return Chain;
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ if (Chain.getNumOperands() &&
+ Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL)
+ return Chain.getOperand(0);
+ }
+ return Chain;
+}
+
/// LowerRET - Lower an ISD::RET node.
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
-
-
+
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
- if (DAG.getMachineFunction().liveout_empty()) {
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
-
SDOperand Chain = Op.getOperand(0);
- SDOperand Flag;
+ // Handle tail call return.
+ Chain = GetPossiblePreceedingTailCall(Chain);
+ if (Chain.getOpcode() == X86ISD::TAILCALL) {
+ SDOperand TailCall = Chain;
+ SDOperand TargetAddress = TailCall.getOperand(1);
+ SDOperand StackAdjustment = TailCall.getOperand(2);
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
+ (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX ||
+ cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+ TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
+ TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
+ "Expecting an global address, external symbol, or register");
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
+
+ SmallVector<SDOperand,8> Operands;
+ Operands.push_back(Chain.getOperand(0));
+ Operands.push_back(TargetAddress);
+ Operands.push_back(StackAdjustment);
+ // Copy registers used by the call. Last operand is a flag so it is not
+ // copied.
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
+ Operands.push_back(Chain.getOperand(i));
+ }
+ return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0],
+ Operands.size());
+ }
+
+ // Regular return.
+ SDOperand Flag;
+
// Copy the result values into the output registers.
if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
RVLocs[0].getLocReg() != X86::ST0) {
// If this is an FP return with ScalarSSE, we need to move the value from
// an XMM register onto the fp-stack.
- if (X86ScalarSSE) {
+ if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
SDOperand MemLoc;
-
+
// If this is a load into a scalarsse value, don't store the loaded value
// back to the stack, only to reload it: just replace the scalar-sse load.
if (ISD::isNON_EXTLoad(Value.Val) &&
- (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
+ Chain.reachesChainWithoutSideEffects(Value.getOperand(0))) {
Chain = Value.getOperand(0);
MemLoc = Value.getOperand(1);
} else {
CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
-
SmallVector<SDOperand, 8> ResultVals;
// Copy all of the result registers out of their specified physreg.
// If we are using ScalarSSE, store ST(0) to the stack and reload it into
// an XMM register.
- if (X86ScalarSSE) {
+ if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
+ SDOperand StoreLoc;
+ const Value *SrcVal = 0;
+ int SrcValOffset = 0;
+ MVT::ValueType RetStoreVT = RVLocs[0].getValVT();
+
+ // Determine where to store the value. If the call result is directly
+ // used by a store, see if we can store directly into the location. In
+ // this case, we'll end up producing a fst + movss[load] + movss[store] to
+ // the same location, and the two movss's will be nuked as dead. This
+ // optimizes common things like "*D = atof(..)" to not need an
+ // intermediate stack slot.
+ if (SDOperand(TheCall, 0).hasOneUse() &&
+ SDOperand(TheCall, 1).hasOneUse()) {
+ // In addition to direct uses, we also support a FP_ROUND that uses the
+ // value, if it is directly stored somewhere.
+ SDNode *User = *TheCall->use_begin();
+ if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse())
+ User = *User->use_begin();
+
+ // Ok, we have one use of the value and one use of the chain. See if
+ // they are the same node: a store.
+ if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) {
+ // Verify that the value being stored is either the call or a
+ // truncation of the call.
+ SDNode *StoreVal = N->getValue().Val;
+ if (StoreVal == TheCall)
+ ; // ok.
+ else if (StoreVal->getOpcode() == ISD::FP_ROUND &&
+ StoreVal->hasOneUse() &&
+ StoreVal->getOperand(0).Val == TheCall)
+ ; // ok.
+ else
+ N = 0; // not ok.
+
+ if (N && N->getChain().Val == TheCall &&
+ !N->isVolatile() && !N->isTruncatingStore() &&
+ N->getAddressingMode() == ISD::UNINDEXED) {
+ StoreLoc = N->getBasePtr();
+ SrcVal = N->getSrcValue();
+ SrcValOffset = N->getSrcValueOffset();
+ RetStoreVT = N->getValue().getValueType();
+ }
+ }
+ }
+
+ // If we weren't able to optimize the result, just create a temporary
+ // stack slot.
+ if (StoreLoc.Val == 0) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
// shouldn't be necessary except that RFP cannot be live across
- // multiple blocks. When stackifier is fixed, they can be uncoupled.
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
- SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ // multiple blocks (which could happen if a select gets lowered into
+ // multiple blocks and scheduled in between them). When stackifier is
+ // fixed, they can be uncoupled.
SDOperand Ops[] = {
- Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
+ Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag
};
Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
- RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
+ RetVal = DAG.getLoad(RetStoreVT, Chain,
+ StoreLoc, SrcVal, SrcValOffset);
Chain = RetVal.getValue(1);
+
+ // If we optimized a truncate, then extend the result back to its desired
+ // type.
+ if (RVLocs[0].getValVT() != RetStoreVT)
+ RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal);
}
ResultVals.push_back(RetVal);
}
}
-//===----------------------------------------------------------------------===//
-// C & StdCall Calling Convention implementation
-//===----------------------------------------------------------------------===//
-// StdCall calling convention seems to be standard for many Windows' API
-// routines and around. It differs from C calling convention just a little:
-// callee should clean up the stack, not caller. Symbols should be also
-// decorated in some fancy way :) It doesn't support any vector arguments.
-
-/// AddLiveIn - This helper function adds the specified physical register to the
-/// MachineFunction as a live in value. It also creates a corresponding virtual
-/// register for it.
-static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
- const TargetRegisterClass *RC) {
- assert(RC->contains(PReg) && "Not the correct regclass!");
- unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
- MF.addLiveIn(PReg, VReg);
- return VReg;
-}
-
-SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
- bool isStdCall) {
- unsigned NumArgs = Op.Val->getNumValues() - 1;
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
- getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
-
- SmallVector<SDOperand, 8> ArgValues;
- unsigned LastVal = ~0U;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
- // places.
- assert(VA.getValNo() != LastVal &&
- "Don't support value assigned to multiple locs yet");
- LastVal = VA.getValNo();
-
- if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- RC = X86::VR128RegisterClass;
- }
-
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
- SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
-
- ArgValues.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc());
-
- // Create the nodes corresponding to a load from this parameter slot.
- int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
- VA.getLocMemOffset());
- SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
- ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
- }
- }
-
- unsigned StackSize = CCInfo.getNextStackOffset();
-
- ArgValues.push_back(Root);
-
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg)
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
-
- if (isStdCall && !isVarArg) {
- BytesToPopOnReturn = StackSize; // Callee pops everything..
- BytesCallerReserves = 0;
- } else {
- BytesToPopOnReturn = 0; // Callee pops nothing.
-
- // If this is an sret function, the return should pop the hidden pointer.
- if (NumArgs &&
- (cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
- ISD::ParamFlags::StructReturn))
- BytesToPopOnReturn = 4;
-
- BytesCallerReserves = StackSize;
- }
-
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
-
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
-}
-
-SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
- unsigned NumOps = (Op.getNumOperands() - 5) / 2;
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
-
- SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
- }
- }
-
- // If the first argument is an sret pointer, remember it.
- bool isSRet = NumOps &&
- (cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
- ISD::ParamFlags::StructReturn);
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- Chain = DAG.getCopyToReg(Chain, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add an implicit use GOT pointer in EBX.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
- if (InFlag.Val)
- Ops.push_back(InFlag);
-
- Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPush = 0;
-
- if (CC == CallingConv::X86_StdCall) {
- if (isVarArg)
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
- else
- NumBytesForCalleeToPush = NumBytes;
- } else {
- // If this is is a call to a struct-return function, the callee
- // pops the hidden struct pointer, so we have to push it back.
- // This is common for Darwin/X86, Linux & Mingw32 targets.
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
- }
-
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
-}
-
-
-//===----------------------------------------------------------------------===//
-// FastCall Calling Convention implementation
-//===----------------------------------------------------------------------===//
-//
-// The X86 'fastcall' calling convention passes up to two integer arguments in
-// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
-// and requires that the callee pop its arguments off the stack (allowing proper
-// tail calls), and has the same return value conventions as C calling convs.
-//
-// This calling convention always arranges for the callee pop value to be 8n+4
-// bytes, which is needed for tail recursion elimination and stack alignment
-// reasons.
-SDOperand
-X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
- getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
-
- SmallVector<SDOperand, 8> ArgValues;
- unsigned LastVal = ~0U;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
- // places.
- assert(VA.getValNo() != LastVal &&
- "Don't support value assigned to multiple locs yet");
- LastVal = VA.getValNo();
-
- if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- RC = X86::VR128RegisterClass;
- }
-
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
- SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
-
- ArgValues.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc());
-
- // Create the nodes corresponding to a load from this parameter slot.
- int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
- VA.getLocMemOffset());
- SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
- ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
- }
- }
-
- ArgValues.push_back(Root);
-
- unsigned StackSize = CCInfo.getNextStackOffset();
-
- if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
- // Make sure the instruction takes 8n+4 bytes to make sure the start of the
- // arguments and the arguments after the retaddr has been pushed are aligned.
- if ((StackSize & 7) == 0)
- StackSize += 4;
- }
-
- VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
- BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
- BytesCallerReserves = 0;
-
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
-}
-
-SDOperand
-X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
- const SDOperand &StackPtr,
- const CCValAssign &VA,
- SDOperand Chain,
- SDOperand Arg) {
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
- if (Flags & ISD::ParamFlags::ByVal) {
- unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
- ISD::ParamFlags::ByValAlignOffs);
-
- assert (Align >= 8);
- unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
- ISD::ParamFlags::ByValSizeOffs;
-
- SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
-
- return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
- AlignNode);
- } else {
- return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
- }
-}
-
-SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
- // Make sure the instruction takes 8n+4 bytes to make sure the start of the
- // arguments and the arguments after the retaddr has been pushed are aligned.
- if ((NumBytes & 7) == 0)
- NumBytes += 4;
- }
-
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
-
- SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- Chain = DAG.getCopyToReg(Chain, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
- }
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ const TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
+// Determines whether a CALL node uses struct return semantics.
+static bool CallIsStructReturn(SDOperand Op) {
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+ if (!NumOps)
+ return false;
+
+ ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6));
+ return Flags->getValue() & ISD::ParamFlags::StructReturn;
+}
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics.
+static bool ArgsAreStructReturn(SDOperand Op) {
+ unsigned NumArgs = Op.Val->getNumValues() - 1;
+ if (!NumArgs)
+ return false;
+
+ ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3));
+ return Flags->getValue() & ISD::ParamFlags::StructReturn;
+}
- // Add an implicit use GOT pointer in EBX.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop
+// its own arguments. Callee pop is necessary to support tail calls.
+bool X86TargetLowering::IsCalleePop(SDOperand Op) {
+ bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (IsVarArg)
+ return false;
- if (InFlag.Val)
- Ops.push_back(InFlag);
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::X86_FastCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::Fast:
+ return PerformTailCallOpt;
+ }
+}
- // FIXME: Do not generate X86ISD::TAILCALL for now.
- Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node.
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+
+ if (Subtarget->is64Bit())
+ if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_32_TailCall;
+ else
+ return CC_X86_32_C;
+}
- // Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+// Selects the appropriate decoration to apply to a MachineFunction containing a
+// given FORMAL_ARGUMENTS node.
+NameDecorationStyle
+X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (CC == CallingConv::X86_FastCall)
+ return FastCall;
+ else if (CC == CallingConv::X86_StdCall)
+ return StdCall;
+ return None;
+}
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+
+// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly
+// be overwritten when lowering the outgoing arguments in a tail call. Currently
+// the implementation of this call is very conservative and assumes all
+// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
+// registers would be overwritten by direct lowering.
+// Possible improvement:
+// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
+// indicating inreg passed arguments which also need not be lowered to a safe
+// stack slot.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) {
+ RegisterSDNode * OpReg = NULL;
+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+ (Op.getOpcode()== ISD::CopyFromReg &&
+ (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
+ OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
+ return true;
+ return false;
}
+// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+// by "Src" to address "Dst" with size and alignment information specified by
+// the specific parameter attribute. The copy will be passed as a byval function
+// parameter.
+static SDOperand
+CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
+ unsigned Flags, SelectionDAG &DAG) {
+ unsigned Align = 1 <<
+ ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs);
+ unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
+ ISD::ParamFlags::ByValSizeOffs;
+ SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
+ SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+}
-//===----------------------------------------------------------------------===//
-// X86-64 C Calling Convention implementation
-//===----------------------------------------------------------------------===//
+SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ SDOperand Root, unsigned i) {
+ // Create the nodes corresponding to a load from this parameter slot.
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
+ bool isByVal = Flags & ISD::ParamFlags::ByVal;
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This
+ // can be changed with more analysis.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ VA.getLocMemOffset(), !isByVal);
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ if (isByVal)
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
+}
SDOperand
-X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ // Decorate the function name.
+ FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool Is64Bit = Subtarget->is64Bit();
- static const unsigned GPR64ArgRegs[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
- };
- static const unsigned XMMArgRegs[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
-
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
- getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op));
SmallVector<SDOperand, 8> ArgValues;
unsigned LastVal = ~0U;
TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
- else if (RegVT == MVT::i64)
+ else if (Is64Bit && RegVT == MVT::i64)
RC = X86::GR64RegisterClass;
- else if (RegVT == MVT::f32)
+ else if (Is64Bit && RegVT == MVT::f32)
RC = X86::FR32RegisterClass;
- else if (RegVT == MVT::f64)
+ else if (Is64Bit && RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
else {
assert(MVT::isVector(RegVT));
- if (MVT::getSizeInBits(RegVT) == 64) {
+ if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) {
RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
RegVT = MVT::i64;
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
// Handle MMX values passed in GPRs.
- if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
+ if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
MVT::getSizeInBits(RegVT) == 64)
ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
ArgValues.push_back(ArgValue);
} else {
assert(VA.isMemLoc());
-
- // Create the nodes corresponding to a load from this parameter slot.
- int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
- VA.getLocMemOffset());
- SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
-
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
- if (Flags & ISD::ParamFlags::ByVal)
- ArgValues.push_back(FIN);
- else
- ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
+ ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
}
}
-
+
unsigned StackSize = CCInfo.getNextStackOffset();
-
+ // align stack specially for tail calls
+ if (CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
-
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
- VarArgsGPOffset = NumIntRegs * 8;
- VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
- RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
-
- // Store the integer parameter registers.
- SmallVector<SDOperand, 8> MemOps;
- SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
- DAG.getConstant(VarArgsGPOffset, getPointerTy()));
- for (; NumIntRegs != 6; ++NumIntRegs) {
- unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass);
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ if (Is64Bit || CC != CallingConv::X86_FastCall) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
}
-
- // Now store the XMM (fp + vector) parameter registers.
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
- DAG.getConstant(VarArgsFPOffset, getPointerTy()));
- for (; NumXMMRegs != 8; ++NumXMMRegs) {
- unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
+ if (Is64Bit) {
+ static const unsigned GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
+ RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
+
+ // Store the integer parameter registers.
+ SmallVector<SDOperand, 8> MemOps;
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsGPOffset));
+ for (; NumIntRegs != 6; ++NumIntRegs) {
+ unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(8));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsFPOffset));
+ for (; NumXMMRegs != 8; ++NumXMMRegs) {
+ unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(16));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
}
- if (!MemOps.empty())
- Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOps[0], MemOps.size());
}
+
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are
+ // aligned.
+ if (!Is64Bit && CC == CallingConv::X86_FastCall &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
+ (StackSize & 7) == 0)
+ StackSize += 4;
ArgValues.push_back(Root);
- BytesToPopOnReturn = 0; // Callee pops nothing.
- BytesCallerReserves = StackSize;
+ // Some CCs need callee pop.
+ if (IsCalleePop(Op)) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything.
+ BytesCallerReserves = 0;
+ } else {
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && ArgsAreStructReturn(Op))
+ BytesToPopOnReturn = 4;
+ BytesCallerReserves = StackSize;
+ }
+
+ if (!Is64Bit) {
+ RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
+ if (CC == CallingConv::X86_FastCall)
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ }
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
}
SDOperand
-X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
+X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
+ const SDOperand &StackPtr,
+ const CCValAssign &VA,
+ SDOperand Chain,
+ SDOperand Arg) {
+ SDOperand PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
+ unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ if (Flags & ISD::ParamFlags::ByVal) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ }
+ return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
+}
+
+SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
SDOperand Chain = Op.getOperand(0);
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
+ bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0
+ && CC == CallingConv::Fast && PerformTailCallOpt;
SDOperand Callee = Op.getOperand(4);
-
+ bool Is64Bit = Subtarget->is64Bit();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
-
+ CCInfo.AnalyzeCallOperands(Op.Val, CCAssignFnForNode(Op));
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+ if (CC == CallingConv::Fast)
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are aligned.
+ if (!Is64Bit && CC == CallingConv::X86_FastCall &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
+ (NumBytes & 7) == 0)
+ NumBytes += 4;
+
+ int FPDiff = 0;
+ if (IsTailCall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
+
+ SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
+ if (IsTailCall) {
+ // Adjust the Return address stack slot.
+ if (FPDiff) {
+ MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
+ RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
+ // Load the "old" Return address.
+ RetAddrFrIdx =
+ DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = SDOperand(RetAddrFrIdx.Val, 1);
+ }
+ }
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
+
+ // Walk the register/memloc assignments, inserting copies/loads. For tail
+ // calls, lower arguments which could otherwise be possibly overwritten to the
+ // stack slot where they would go on normal function calls.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
+ if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
+ Arg));
+ }
}
}
InFlag = Chain.getValue(1);
}
- if (isVarArg) {
+ if (IsTailCall)
+ InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output?
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ // Does not work with tail call since ebx is not restored correctly by
+ // tailcaller. TODO: at least for x86 - verify for x86-64
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ Chain = DAG.getCopyToReg(Chain, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
InFlag = Chain.getValue(1);
}
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (IsTailCall) {
+ SmallVector<SDOperand, 8> MemOpChains2;
+ SDOperand FIN;
+ int FI = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc()) {
+ assert(VA.isMemLoc());
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+ SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
+ unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
+ SDOperand Source = Arg;
+ if (IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ // Copy from stack slots to stack slot of a tail called function. This
+ // needs to be done because if we would lower the arguments directly
+ // to their real stack slot we might end up overwriting each other.
+ // Get source stack slot.
+ Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+ Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
+ if ((Flags & ISD::ParamFlags::ByVal)==0)
+ Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0);
+ }
+
+ if (Flags & ISD::ParamFlags::ByVal) {
+ // Copy relative to framepointer.
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+ Flags, DAG));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ if (FPDiff)
+ Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
+ }
+
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
- if (getTargetMachine().getCodeModel() != CodeModel::Large
+ if ((IsTailCall || !Is64Bit ||
+ getTargetMachine().getCodeModel() != CodeModel::Large)
&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- if (getTargetMachine().getCodeModel() != CodeModel::Large)
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (IsTailCall || !Is64Bit ||
+ getTargetMachine().getCodeModel() != CodeModel::Large)
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
+ } else if (IsTailCall) {
+ assert(Callee.getOpcode() == ISD::LOAD &&
+ "Function destination must be loaded into virtual register");
+ unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
+
+ Chain = DAG.getCopyToReg(Chain,
+ DAG.getRegister(Opc, getPointerTy()) ,
+ Callee,InFlag);
+ Callee = DAG.getRegister(Opc, getPointerTy());
+ // Add register as live out.
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+ }
+
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
+
+ if (IsTailCall) {
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getIntPtrConstant(NumBytes));
+ Ops.push_back(DAG.getIntPtrConstant(0));
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Returns a chain & a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ }
+
Ops.push_back(Chain);
Ops.push_back(Callee);
+ if (IsTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
-
+
if (InFlag.Val)
Ops.push_back(InFlag);
- // FIXME: Do not generate X86ISD::TAILCALL for now.
- Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
+ if (IsTailCall) {
+ assert(InFlag.Val &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(X86ISD::TAILCALL,
+ Op.Val->getVTList(), &Ops[0], Ops.size());
+
+ return SDOperand(Chain.Val, Op.ResNo);
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (IsCalleePop(Op))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && CallIsStructReturn(Op))
+ // If this is is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
+
// Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(0, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush),
+ InFlag);
InFlag = Chain.getValue(1);
-
+
// Handle result values, copying them out of physregs into vregs that we
// return.
return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
}
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// * elf/pic is disabled OR
+// * elf/pic enabled + callee is in module + callee has
+// visibility protected or hidden
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) {
+ if (PerformTailCallOpt) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ StackSize = Offset;
+ }
+ return StackSize;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
+ SDOperand Ret,
+ SelectionDAG& DAG) const {
+ if (!PerformTailCallOpt)
+ return false;
+
+ // Check whether CALL node immediatly preceeds the RET node and whether the
+ // return uses the result of the node or is a void return.
+ unsigned NumOps = Ret.getNumOperands();
+ if ((NumOps == 1 &&
+ (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
+ Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
+ (NumOps > 1 &&
+ Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
+ Ret.getOperand(1) == SDOperand(Call.Val,0))) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ SDOperand Callee = Call.getOperand(4);
+ // On elf/pic %ebx needs to be livein.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT())
+ return true;
+
+ // Can only do local tail calls with PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
// X < 0 -> X == 0, jump on sign.
X86CC = X86::COND_S;
return true;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) {
+ // X < 1 -> X <= 0
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ X86CC = X86::COND_LE;
+ return true;
}
}
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ if (NumElts != 2 && NumElts != 4)
return false;
if (!isUndefOrEqual(Elts[0], NumElts))
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
- if (Val > 4)
+ if (Val >= 4)
return false;
}
return true;
}
-/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as
/// values in ther permute mask.
static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
SDOperand &V2, SDOperand &Mask,
}
std::swap(V1, V2);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static
+SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = Mask.getValueType();
+ MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+ continue;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
+ else
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
+}
+
+
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
- if (Arg.getOpcode() != ISD::UNDEF) {
- unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
- if (Idx < NumElems) {
- unsigned Opc = V1.Val->getOpcode();
- if (Opc == ISD::UNDEF)
- continue;
- if (Opc != ISD::BUILD_VECTOR ||
- !isZeroNode(V1.Val->getOperand(Idx)))
- return false;
- } else if (Idx >= NumElems) {
- unsigned Opc = V2.Val->getOpcode();
- if (Opc == ISD::UNDEF)
- continue;
- if (Opc != ISD::BUILD_VECTOR ||
- !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
- return false;
- }
+ if (Arg.getOpcode() == ISD::UNDEF)
+ continue;
+
+ unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+ if (Idx < NumElems) {
+ unsigned Opc = V1.Val->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V1.Val->getOperand(Idx)))
+ return false;
+ } else if (Idx >= NumElems) {
+ unsigned Opc = V2.Val->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
+ return false;
}
}
return true;
///
static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
- unsigned NumElems = MVT::getVectorNumElements(VT);
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
- bool isFP = MVT::isFloatingPoint(EVT);
- SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
- SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
- return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
+
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDOperand Vec;
+ if (MVT::getSizeInBits(VT) == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) {
+ assert(MVT::isVector(VT) && "Expected a vector type");
+
+ // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDOperand Vec;
+ if (MVT::getSizeInBits(VT) == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
}
+
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- Mask = getZeroVector(MaskVT, DAG);
+ Mask = getZeroVector(MVT::v4i32, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector.
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
unsigned NumElems, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
- SDOperand Zero = DAG.getConstant(0, EVT);
- SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
- MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
+ SmallVector<SDOperand, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ if (i == Idx) // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(DAG.getConstant(NumElems, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i, EVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
- DAG.getConstant(i/2, TLI.getPointerTy()));
+ DAG.getIntPtrConstant(i/2));
}
}
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
- DAG.getConstant(i, TLI.getPointerTy()));
+ DAG.getIntPtrConstant(i));
}
}
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
- // All zero's are handled with pxor.
- if (ISD::isBuildVectorAllZeros(Op.Val))
- return Op;
+ // All zero's are handled with pxor, all one's are handled with pcmpeqd.
+ if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) {
+ // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+ // eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ return Op;
- // All one's are handled with pcmpeqd.
- if (ISD::isBuildVectorAllOnes(Op.Val))
- return Op;
+ if (ISD::isBuildVectorAllOnes(Op.Val))
+ return getOnesVector(Op.getValueType(), DAG);
+ return getZeroVector(Op.getValueType(), DAG);
+ }
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
- unsigned NumNonZeroImms = 0;
- std::set<SDOperand> Values;
+ bool HasNonImms = false;
+ SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
- if (Elt.getOpcode() != ISD::UNDEF) {
- Values.insert(Elt);
- if (isZeroNode(Elt))
- NumZero++;
- else {
- NonZeros |= (1 << i);
- NumNonZero++;
- if (Elt.getOpcode() == ISD::Constant ||
- Elt.getOpcode() == ISD::ConstantFP)
- NumNonZeroImms++;
- }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ HasNonImms = true;
+ if (isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
}
}
if (NumNonZero == 0) {
- if (NumZero == 0)
- // All undef vector. Return an UNDEF.
- return DAG.getNode(ISD::UNDEF, VT);
- else
- // A mix of zero and undef. Return a zero vector.
- return getZeroVector(VT, DAG);
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ return DAG.getNode(ISD::UNDEF, VT);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
return SDOperand();
// Special case for single non-zero element.
- if (NumNonZero == 1) {
+ if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
+ else if (!HasNonImms) // Otherwise, it's better to do a constpool load.
+ return SDOperand();
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
- if (NumNonZero == NumNonZeroImms)
+ if (!HasNonImms)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
return SDOperand();
}
+static
+SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ SDOperand NewV;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
+ PermMask.Val->op_end());
+
+ // First record which half of which vector the low elements come from.
+ SmallVector<unsigned, 4> LowQuad(4);
+ for (unsigned i = 0; i < 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++LowQuad[QuadIdx];
+ }
+ int BestLowQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LowQuad[i] > MaxQuad) {
+ BestLowQuad = i;
+ MaxQuad = LowQuad[i];
+ }
+ }
+
+ // Record which half of which vector the high elements come from.
+ SmallVector<unsigned, 4> HighQuad(4);
+ for (unsigned i = 4; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++HighQuad[QuadIdx];
+ }
+ int BestHighQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HighQuad[i] > MaxQuad) {
+ BestHighQuad = i;
+ MaxQuad = HighQuad[i];
+ }
+ }
+
+ // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
+ if (BestLowQuad != -1 || BestHighQuad != -1) {
+ // First sort the 4 chunks in order using shufpd.
+ SmallVector<SDOperand, 8> MaskVec;
+ if (BestLowQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ if (BestHighQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
+
+ // Now sort high and low parts separately.
+ BitVector InOrder(8);
+ if (BestLowQuad != -1) {
+ // Sort lower half in order using PSHUFLW.
+ MaskVec.clear();
+ bool AnyOutOrder = false;
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestLowQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ if (BestHighQuad != -1) {
+ // Sort high half in order using PSHUFHW if possible.
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ bool AnyOutOrder = false;
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestHighQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ SDOperand Elt = MaskElts[i];
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i)
+ continue;
+ SDOperand ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ }
+
+ // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
+ ///as few as possible.
+ // First, let's find out how many elements are already in the right order.
+ unsigned V1InOrder = 0;
+ unsigned V1FromV1 = 0;
+ unsigned V2InOrder = 0;
+ unsigned V2FromV2 = 0;
+ SmallVector<SDOperand, 8> V1Elts;
+ SmallVector<SDOperand, 8> V2Elts;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(Elt);
+ ++V1InOrder;
+ ++V2InOrder;
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
+ ++V1InOrder;
+ } else if (EltIdx == i+8) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i, MaskEVT));
+ ++V2InOrder;
+ } else if (EltIdx < 8) {
+ V1Elts.push_back(Elt);
+ ++V1FromV1;
+ } else {
+ V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
+ ++V2FromV2;
+ }
+ }
+
+ if (V2InOrder > V1InOrder) {
+ PermMask = CommuteVectorShuffleMask(PermMask, DAG);
+ std::swap(V1, V2);
+ std::swap(V1Elts, V2Elts);
+ std::swap(V1FromV1, V2FromV2);
+ }
+
+ if ((V1FromV1 + V1InOrder) != 8) {
+ // Some elements are from V2.
+ if (V1FromV1) {
+ // If there are elements that are from V1 but out of place,
+ // then first sort them in place
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx >= 8)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ else
+ MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ }
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ }
+
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx < 8)
+ continue;
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ } else {
+ // All elements are from V1.
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ }
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
+ MVT::ValueType VT,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ unsigned NumElems = PermMask.getNumOperands();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT::ValueType NewVT = MaskVT;
+ switch (VT) {
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ default: assert(false && "Unexpected!");
+ }
+
+ if (NewWidth == 2)
+ if (MVT::isInteger(VT))
+ NewVT = MVT::v2i64;
+ else
+ NewVT = MVT::v2f64;
+ unsigned Scale = NumElems / NewWidth;
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ unsigned StartIdx = ~0U;
+ for (unsigned j = 0; j < Scale; ++j) {
+ SDOperand Elt = PermMask.getOperand(i+j);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (StartIdx == ~0U)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDOperand();
+ }
+ if (StartIdx == ~0U)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size()));
+}
+
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
return PromoteSplat(Op, DAG);
}
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.Val)) {
+ SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val) {
+ SDOperand NewV1 = NewOp.getOperand(0);
+ SDOperand NewV2 = NewOp.getOperand(1);
+ SDOperand NewMask = NewOp.getOperand(2);
+ if (isCommutedMOVL(NewMask.Val, true, false)) {
+ NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
+ NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
+ NewV1, NewV2, getMOVLMask(2, DAG));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
Commuted = true;
}
+ // FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
-
- // Handle v8i16 shuffle high / low shuffle node pair.
- if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size());
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- MaskVec.clear();
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- }
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
}
}
- if (NumElems == 4 &&
- // Don't do this for MMX.
- MVT::getSizeInBits(VT) != 64) {
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return NewOp;
+ }
+
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ // Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
- SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
- SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask1(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask2(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
+ Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
- IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
- IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
- IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
- IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ IdxVec.
+ push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
+ IdxVec.
+ push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ IdxVec.
+ push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ IdxVec.
+ push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
} else if (MVT::getSizeInBits(VT) == 64) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
- IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ IdxVec.
+ push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
}
return SDOperand();
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
- // Transform it so it match pinsrw which expects a 16-bit value in a GR32
- // as its second argument.
MVT::ValueType VT = Op.getValueType();
- MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ if (EVT == MVT::i8)
+ return SDOperand();
+
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
- if (MVT::getSizeInBits(BaseVT) == 16) {
+
+ if (MVT::getSizeInBits(EVT) == 16) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
- N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
- } else if (MVT::getSizeInBits(BaseVT) == 32) {
- unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
- if (Idx == 0) {
- // Use a movss.
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- MaskVec.push_back(DAG.getConstant(4, BaseVT));
- for (unsigned i = 1; i <= 3; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
- DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size()));
- } else {
- // Use two pinsrw instructions to insert a 32 bit value.
- Idx <<= 1;
- if (MVT::isFloatingPoint(N1.getValueType())) {
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
- N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
- N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
- DAG.getConstant(0, getPointerTy()));
- }
- N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
- N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
- DAG.getConstant(Idx, getPointerTy()));
- N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
- N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
- DAG.getConstant(Idx+1, getPointerTy()));
- return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
- }
}
-
return SDOperand();
}
return Result;
}
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- "Not an i64 shift!");
- bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
- SDOperand ShOpLo = Op.getOperand(0);
- SDOperand ShOpHi = Op.getOperand(1);
- SDOperand ShAmt = Op.getOperand(2);
- SDOperand Tmp1 = isSRA ?
- DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
- DAG.getConstant(0, MVT::i32);
-
- SDOperand Tmp2, Tmp3;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
- } else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
- }
-
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
- SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
- DAG.getConstant(32, MVT::i8));
- SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
- SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
-
- SDOperand Hi, Lo;
- SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
-
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
- SmallVector<SDOperand, 4> Ops;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(InFlag);
- Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
- InFlag = Hi.getValue(1);
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ "Not an i64 shift!");
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDOperand ShOpLo = Op.getOperand(0);
+ SDOperand ShOpHi = Op.getOperand(1);
+ SDOperand ShAmt = Op.getOperand(2);
+ SDOperand Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
+ DAG.getConstant(0, MVT::i32);
+
+ SDOperand Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
+ }
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(InFlag);
- Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
- } else {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(InFlag);
- Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
- InFlag = Lo.getValue(1);
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
+ DAG.getConstant(32, MVT::i8));
+ SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDOperand Hi, Lo;
+ SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
+ SmallVector<SDOperand, 4> Ops;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(InFlag);
- Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
- }
+ Ops.clear();
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ } else {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
- Ops.push_back(Lo);
- Ops.push_back(Hi);
- return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ }
+
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
+ Ops.clear();
+ Ops.push_back(Lo);
+ Ops.push_back(Hi);
+ return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
StackSlot, NULL, 0);
+ // These are really Legal; caller falls through into that case.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return Result;
+ if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
+ Subtarget->is64Bit())
+ return Result;
+
// Build the FILD
SDVTList Tys;
- if (X86ScalarSSE)
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
- Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
+ Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
Tys, &Ops[0], Ops.size());
- if (X86ScalarSSE) {
+ if (useSSE) {
Chain = Result.getValue(1);
SDOperand InFlag = Result.getValue(2);
return Result;
}
-SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+std::pair<SDOperand,SDOperand> X86TargetLowering::
+FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
+
+ // These are really Legal.
+ if (Op.getValueType() == MVT::i32 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDOperand(), SDOperand());
+ if (Subtarget->is64Bit() &&
+ Op.getValueType() == MVT::i64 &&
+ Op.getOperand(0).getValueType() != MVT::f80)
+ return std::make_pair(SDOperand(), SDOperand());
+
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
unsigned Opc;
switch (Op.getValueType()) {
- default: assert(0 && "Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDOperand Chain = DAG.getEntryNode();
SDOperand Value = Op.getOperand(0);
- if (X86ScalarSSE) {
+ if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = { Chain, Value, StackSlot };
SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG);
+ SDOperand FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.Val == 0) return SDOperand();
+
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
}
+SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
+ std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
+ SDOperand FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.Val == 0) return 0;
+
+ // Return an i64 load from the stack slot.
+ SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0);
+
+ // Use a MERGE_VALUES node to drop the chain result value.
+ return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val;
+}
+
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EltVT = VT;
const Type *OpNTy = MVT::getTypeForValueType(EltVT);
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
- Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
+ Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63))));
CV.push_back(C);
CV.push_back(C);
} else {
- Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
+ Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31))));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
const Type *OpNTy = MVT::getTypeForValueType(EltVT);
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
- Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
+ Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63)));
CV.push_back(C);
CV.push_back(C);
} else {
- Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
+ Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31)));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
SrcVT = VT;
+ SrcTy = MVT::getTypeForValueType(SrcVT);
}
+ // And if it is bigger, shrink it first.
+ if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ SrcTy = MVT::getTypeForValueType(SrcVT);
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
if (SrcVT == MVT::f64) {
- CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
}
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
- CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
- CV.push_back(ConstantFP::get(SrcTy, 0.0));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
}
-SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
- SDOperand Chain) {
+SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDOperand Cond;
SDOperand Op0 = Op.getOperand(0);
SDOperand Op1 = Op.getOperand(1);
SDOperand CC = Op.getOperand(2);
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
- const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
unsigned X86CC;
if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
Op0, Op1, DAG)) {
- SDOperand Ops1[] = { Chain, Op0, Op1 };
- Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
- SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
- return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
+ Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
+ return DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
}
assert(isFP && "Illegal integer SetCC!");
- SDOperand COps[] = { Chain, Op0, Op1 };
- Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
-
+ Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
switch (SetCCOpcode) {
default: assert(false && "Illegal floating point SetCC!");
case ISD::SETOEQ: { // !PF & ZF
- SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
- SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
- SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
- Tmp1.getValue(1) };
- SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
+ SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86::COND_NP, MVT::i8), Cond);
+ SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86::COND_E, MVT::i8), Cond);
return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
}
case ISD::SETUNE: { // PF | !ZF
- SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
- SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
- SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
- Tmp1.getValue(1) };
- SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
+ SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86::COND_P, MVT::i8), Cond);
+ SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86::COND_NE, MVT::i8), Cond);
return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
}
}
}
+
SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
bool addTest = true;
- SDOperand Chain = DAG.getEntryNode();
SDOperand Cond = Op.getOperand(0);
SDOperand CC;
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
if (Cond.getOpcode() == ISD::SETCC)
- Cond = LowerSETCC(Cond, DAG, Chain);
+ Cond = LowerSETCC(Cond, DAG);
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
if (Cond.getOpcode() == X86ISD::SETCC) {
CC = Cond.getOperand(0);
- // If condition flag is set by a X86ISD::CMP, then make a copy of it
- // (since flag operand cannot be shared). Use it as the condition setting
- // operand in place of the X86ISD::SETCC.
- // If the X86ISD::SETCC has more than one use, then perhaps it's better
- // to use a test instead of duplicating the X86ISD::CMP (for register
- // pressure reason)?
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- bool IllegalFPCMov = !X86ScalarSSE &&
- MVT::isFloatingPoint(Op.getValueType()) &&
- !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
- if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
- !IllegalFPCMov) {
- SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
- Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
+ MVT::ValueType VT = Op.getValueType();
+
+ bool IllegalFPCMov = false;
+ if (MVT::isFloatingPoint(VT) && !MVT::isVector(VT) &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+
+ if ((Opc == X86ISD::CMP ||
+ Opc == X86ISD::COMI ||
+ Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
+ Cond = Cmp;
addTest = false;
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
- Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
+ Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
}
- VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(),
+ MVT::Flag);
SmallVector<SDOperand, 4> Ops;
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
Ops.push_back(Op.getOperand(2));
Ops.push_back(Op.getOperand(1));
Ops.push_back(CC);
- Ops.push_back(Cond.getValue(1));
+ Ops.push_back(Cond);
return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
}
SDOperand Cond = Op.getOperand(1);
SDOperand Dest = Op.getOperand(2);
SDOperand CC;
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
if (Cond.getOpcode() == ISD::SETCC)
- Cond = LowerSETCC(Cond, DAG, Chain);
+ Cond = LowerSETCC(Cond, DAG);
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
if (Cond.getOpcode() == X86ISD::SETCC) {
CC = Cond.getOperand(0);
- // If condition flag is set by a X86ISD::CMP, then make a copy of it
- // (since flag operand cannot be shared). Use it as the condition setting
- // operand in place of the X86ISD::SETCC.
- // If the X86ISD::SETCC has more than one use, then perhaps it's better
- // to use a test instead of duplicating the X86ISD::CMP (for register
- // pressure reason)?
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
- SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
- Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
+ if (Opc == X86ISD::CMP ||
+ Opc == X86ISD::COMI ||
+ Opc == X86ISD::UCOMI) {
+ Cond = Cmp;
addTest = false;
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
- Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
+ Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
}
return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
- Cond, Op.getOperand(2), CC, Cond.getValue(1));
-}
-
-SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
- unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
-
- if (Subtarget->is64Bit())
- return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
- else
- switch (CallingConv) {
- default:
- assert(0 && "Unsupported calling convention");
- case CallingConv::Fast:
- // TODO: Implement fastcc
- // Falls through
- case CallingConv::C:
- case CallingConv::X86_StdCall:
- return LowerCCCCallTo(Op, DAG, CallingConv);
- case CallingConv::X86_FastCall:
- return LowerFastCCCallTo(Op, DAG, CallingConv);
- }
+ Chain, Op.getOperand(2), CC, Cond);
}
SDOperand Flag;
MVT::ValueType IntPtr = getPointerTy();
- MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ MVT::ValueType SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}
-SDOperand
-X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- const Function* Fn = MF.getFunction();
- if (Fn->hasExternalLinkage() &&
- Subtarget->isTargetCygMing() &&
- Fn->getName() == "main")
- MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
-
- unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (Subtarget->is64Bit())
- return LowerX86_64CCCArguments(Op, DAG);
- else
- switch(CC) {
- default:
- assert(0 && "Unsupported calling convention");
- case CallingConv::Fast:
- // TODO: implement fastcc.
-
- // Falls through
- case CallingConv::C:
- return LowerCCCArguments(Op, DAG);
- case CallingConv::X86_StdCall:
- MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
- return LowerCCCArguments(Op, DAG, true);
- case CallingConv::X86_FastCall:
- MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
- return LowerFastCCArguments(Op, DAG);
- }
-}
-
SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
SDOperand InFlag(0, 0);
SDOperand Chain = Op.getOperand(0);
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
if (AVT > MVT::i8) {
if (I) {
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
BytesLeft = I->getValue() % UBytes;
} else {
assert(AVT >= MVT::i32 &&
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
- SDOperand Chain = Op.getOperand(0);
- unsigned Align =
- (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
- if (Align == 0) Align = 1;
-
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned or size is more than the threshold, call memcpy.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
- if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
- MVT::ValueType IntPtr = getPointerTy();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = getTargetData()->getIntPtrType();
- Entry.Node = Op.getOperand(1); Args.push_back(Entry);
- Entry.Node = Op.getOperand(2); Args.push_back(Entry);
- Entry.Node = Op.getOperand(3); Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
- DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
- return CallResult.second;
- }
-
+SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
+ SDOperand Dest,
+ SDOperand Source,
+ unsigned Size,
+ unsigned Align,
+ SelectionDAG &DAG) {
MVT::ValueType AVT;
- SDOperand Count;
unsigned BytesLeft = 0;
- bool TwoRepMovs = false;
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
break;
default: // Byte aligned
AVT = MVT::i8;
- Count = Op.getOperand(3);
break;
}
- if (AVT > MVT::i8) {
- if (I) {
- unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
- BytesLeft = I->getValue() % UBytes;
- } else {
- assert(AVT >= MVT::i32 &&
- "Do not use rep;movs if not at least DWORD aligned");
- Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
- Op.getOperand(3), DAG.getConstant(2, MVT::i8));
- TwoRepMovs = true;
- }
- }
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
+ BytesLeft = Size % UBytes;
SDOperand InFlag(0, 0);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Op.getOperand(1), InFlag);
+ Dest, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
- Op.getOperand(2), InFlag);
+ Source, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
- if (TwoRepMovs) {
- InFlag = Chain.getValue(1);
- Count = Op.getOperand(3);
- MVT::ValueType CVT = Count.getValueType();
- SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
- DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
- Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
- Left, InFlag);
- InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getValueType(MVT::i8));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
- } else if (BytesLeft) {
+ if (BytesLeft) {
// Issue loads and stores for the last 1 - 7 bytes.
- unsigned Offset = I->getValue() - BytesLeft;
- SDOperand DstAddr = Op.getOperand(1);
+ unsigned Offset = Size - BytesLeft;
+ SDOperand DstAddr = Dest;
MVT::ValueType DstVT = DstAddr.getValueType();
- SDOperand SrcAddr = Op.getOperand(2);
+ SDOperand SrcAddr = Source;
MVT::ValueType SrcVT = SrcAddr.getValueType();
SDOperand Value;
if (BytesLeft >= 4) {
return Chain;
}
-SDOperand
-X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
+/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
+SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDOperand TheOp = Op.getOperand(0);
- SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
+ SDOperand TheChain = N->getOperand(0);
+ SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
if (Subtarget->is64Bit()) {
- SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
- SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
- MVT::i64, Copy1.getValue(2));
- SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
+ SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
+ SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX,
+ MVT::i64, rax.getValue(2));
+ SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx,
DAG.getConstant(32, MVT::i8));
SDOperand Ops[] = {
- DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
+ DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1)
};
Tys = DAG.getVTList(MVT::i64, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}
- SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
- SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
- MVT::i32, Copy1.getValue(2));
- SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
- Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
+ SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
+ SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX,
+ MVT::i32, eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDOperand Ops[] = { eax, edx };
+ Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2);
+
+ // Use a MERGE_VALUES to return the value and chain.
+ Ops[1] = edx.getValue(1);
+ Tys = DAG.getVTList(MVT::i64, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsFPOffset, MVT::i32),
FIN, SV->getValue(), SV->getOffset());
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
SV->getOffset());
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
SV->getOffset());
if (i == 2)
break;
SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getIntPtrConstant(8));
DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getIntPtrConstant(8));
}
return Chain;
}
SDOperand RHS = Op.getOperand(2);
translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
- SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
- SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
- VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
- SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
- SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
+ SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS);
+ SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
}
}
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
- DAG.getConstant(4, getPointerTy()));
+ DAG.getIntPtrConstant(4));
}
SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
if (Subtarget->is64Bit())
return SDOperand();
- return DAG.getConstant(8, getPointerTy());
+ return DAG.getIntPtrConstant(8);
}
SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
getPointerTy());
SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
- DAG.getConstant(-4UL, getPointerTy()));
+ DAG.getIntPtrConstant(-4UL));
StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
- MF.addLiveOut(X86::ECX);
+ MF.getRegInfo().addLiveOut(X86::ECX);
return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
Chain, DAG.getRegister(X86::ECX, getPointerTy()));
SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+
if (Subtarget->is64Bit()) {
- return SDOperand(); // not yet supported
+ SDOperand OutChains[6];
+
+ // Large code-model.
+
+ const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
+ const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
+
+ const unsigned char N86R10 =
+ ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
+ const unsigned char N86R11 =
+ ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDOperand Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset());
+
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpSV->getValue(),
+ TrmpSV->getOffset() + 2, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset() + 10);
+
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
+ TrmpSV->getOffset() + 12, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset() + 20);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset() + 22);
+
+ SDOperand Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) };
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
} else {
Function *Func = (Function *)
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
default:
assert(0 && "Unsupported calling convention");
case CallingConv::C:
- case CallingConv::Fast:
case CallingConv::X86_StdCall: {
// Pass 'nest' parameter in ECX.
// Must be kept in sync with X86CallingConv.td
// Check that ECX wasn't needed by an 'inreg' parameter.
const FunctionType *FTy = Func->getFunctionType();
- const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ const ParamAttrsList *Attrs = Func->getParamAttrs();
if (Attrs && !Func->isVarArg()) {
unsigned InRegCount = 0;
break;
}
- const X86InstrInfo *TII =
- ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
-
SDOperand OutChains[4];
SDOperand Addr, Disp;
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
- unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
- unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg);
+ const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
+ const unsigned char N86Reg =
+ ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
TrmpSV->getOffset() + 1, false, 1);
- unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
+ const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
TrmpSV->getValue() + 5, TrmpSV->getOffset());
OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
TrmpSV->getOffset() + 6, false, 1);
- return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4);
+ SDOperand Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
+ }
+}
+
+SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ MVT::ValueType VT = Op.getValueType();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other,
+ DAG.getEntryNode(), StackSlot);
+
+ // Load FP Control Word from stack slot
+ SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0);
+
+ // Transform as necessary
+ SDOperand CWD1 =
+ DAG.getNode(ISD::SRL, MVT::i16,
+ DAG.getNode(ISD::AND, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDOperand CWD2 =
+ DAG.getNode(ISD::SRL, MVT::i16,
+ DAG.getNode(ISD::AND, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDOperand RetVal =
+ DAG.getNode(ISD::AND, MVT::i16,
+ DAG.getNode(ISD::ADD, MVT::i16,
+ DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
+}
+
+SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType OpVT = VT;
+ unsigned NumBits = MVT::getSizeInBits(VT);
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SmallVector<SDOperand, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
+ return Op;
+}
+
+SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType OpVT = VT;
+ unsigned NumBits = MVT::getSizeInBits(VT);
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
}
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SmallVector<SDOperand, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
+ return Op;
}
/// LowerOperation - Provide custom lowering hooks for some operations.
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::MEMSET: return LowerMEMSET(Op, DAG);
case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
- case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+
+ // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands.
+ case ISD::READCYCLECOUNTER:
+ return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0);
+ }
+}
+
+/// ExpandOperation - Provide custom lowering hooks for expanding operations.
+SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
+ case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
}
- return SDOperand();
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
}
}
}
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
+ MVT::ValueType VT2) const {
+ if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2))
+ return false;
+ unsigned NumBits1 = MVT::getSizeInBits(VT1);
+ unsigned NumBits2 = MVT::getSizeInBits(VT2);
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
// Load the old value of the high byte of the control word...
unsigned OldCW =
- F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
// Set the high part to be round to zero...
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
- AM.Base.FrameIndex = Op.getFrameIndex();
+ AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImmediate())
i %= NumElems;
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
return (i == 0)
- ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+ ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
SDOperand Idx = PermMask.getOperand(i);
if (Idx.getOpcode() == ISD::UNDEF)
return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
else if (VT == MVT::i8)
return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
- break;
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+ break;
}
}
// GCC calls "st(0)" just plain "st".
if (StringsEqualNoCase("{st}", Constraint)) {
Res.first = X86::ST0;
- Res.second = X86::RSTRegisterClass;
+ Res.second = X86::RFP80RegisterClass;
}
return Res;