From 242b38bae5d8eeadca0e0cf4777b3b285cb5eb01 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 23 Feb 2009 09:03:22 +0000 Subject: [PATCH] Only v1i16 (i.e. _m64) is returned via RAX / RDX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@65313 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CallingConv.td | 8 +++++--- lib/Target/X86/X86ISelLowering.cpp | 29 +++++++++++++++++---------- lib/Target/X86/X86InstrMMX.td | 32 ++++++++++++++++++++++++------ test/CodeGen/X86/ret-mmx.ll | 11 +++++++++- 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 80bdbd0b1cf..d2fc86398e4 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -72,9 +72,11 @@ def RetCC_X86_64_C : CallingConv<[ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, - // MMX vector types are always returned in RAX. This seems to disagree with - // ABI documentation but is bug compatible with gcc. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[RAX]>>, + // MMX vector types are always returned in XMM0 except for v1i64 which is + // returned in RAX. This disagrees with ABI documentation but is bug + // compatible with gcc. + CCIfType<[v1i64], CCAssignToReg<[RAX]>>, + CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo ]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cdf8d0d40f8..c73d8982ffe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -589,8 +589,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); - // FIXME: add MMX packed arithmetics - setOperationAction(ISD::ADD, MVT::v8i8, Legal); setOperationAction(ISD::ADD, MVT::v4i16, Legal); setOperationAction(ISD::ADD, MVT::v2i32, Legal); @@ -997,12 +995,15 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { continue; } - // 64-bit vector (MMX) values are returned in RAX. + // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 + // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { MVT ValVT = ValToCopy.getValueType(); - if (VA.getLocReg() == X86::RAX && - ValVT.isVector() && ValVT.getSizeInBits() == 64) + if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) + ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); + } } Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); @@ -1081,10 +1082,18 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, SDValue Val; if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { - // For x86-64, MMX values are returned in RAX. - Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), - MVT::i64, InFlag).getValue(1); - Val = Chain.getValue(0); + // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. + if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + MVT::v2i64, InFlag).getValue(1); + Val = Chain.getValue(0); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, + Val, DAG.getConstant(0, MVT::i64)); + } else { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + MVT::i64, InFlag).getValue(1); + Val = Chain.getValue(0); + } Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), @@ -6706,7 +6715,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Node->getDebugLoc(); MVT T = Node->getValueType(0); SDValue negOp = DAG.getNode(ISD::SUB, dl, T, - DAG.getConstant(0, T), Node->getOperand(2)); + DAG.getConstant(0, T), Node->getOperand(2)); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 62055c05d60..71f2cb164d5 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -156,10 +156,12 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), - "movd\t{$src, $dst|$dst, $src}", []>; + "movd\t{$src, $dst|$dst, $src}", + []>; let neverHasSideEffects = 1 in -def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg, (outs GR64:$dst), (ins VR64:$src), +def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg, + (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; let neverHasSideEffects = 1 in @@ -187,6 +189,10 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), MOVL_shuffle_mask)))]>; +let neverHasSideEffects = 1 in +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), + "movq2dq\t{$src, $dst|$dst, $src}", []>; + def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>; @@ -575,9 +581,17 @@ def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; - -// Move scalar to XMM zero-extended -// movd to XMM register zero-extends +def : Pat<(f64 (bitconvert (v1i64 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v2i32 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), + (MMX_MOVQ2FR64rr VR64:$src)>; + +// Move scalar to MMX zero-extended +// movd to MMX register zero-extends let AddedComplexity = 15 in { def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))), (MMX_MOVZDI2PDIrr GR32:$src)>; @@ -667,7 +681,13 @@ def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), (MMX_PANDNrm VR64:$src1, addr:$src2)>; // Move MMX to lower 64-bit of XMM -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))), (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; // Move lower 64-bit of XMM to MMX. diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll index 6587eabb766..48f611484ef 100644 --- a/test/CodeGen/X86/ret-mmx.ll +++ b/test/CodeGen/X86/ret-mmx.ll @@ -10,8 +10,17 @@ entry: ret void } +declare <1 x i64> @return_v1di() + define <1 x i64> @t2() nounwind { ret <1 x i64> } -declare <1 x i64> @return_v1di() +define <2 x i32> @t3() nounwind { + ret <2 x i32> +} + +define double @t4() nounwind { + ret double bitcast (<2 x i32> to double) +} + -- 2.34.1