// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
-// be naturally aligned on some targets but not on others.
-// FIXME: Actually implement support for targets that don't require the
-// alignment. This probably wants a subtarget predicate.
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 16;
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
// Alias instructions that map fld0 to pxor for sse.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
canFoldAsLoad = 1 in
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
- "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
+ // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
// disregarded.
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
- Requires<[HasSSE1, NoSSEBreakDep]>;
+ Requires<[HasSSE1, OptForSize]>;
// Vector operation, reg.
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
- "xorps\t$dst, $dst",
+def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
let Predicates = [HasSSE1] in {
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
- Requires<[HasSSE2, NoSSEBreakDep]>;
+ Requires<[HasSSE2, OptForSize]>;
def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
"cvtsi2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp GR32:$src))]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
- Requires<[HasSSE2, NoSSEBreakDep]>;
+ Requires<[HasSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>;
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
// Match intrinsics which expect XMM operand(s).
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
// Alias instructions that map fld0 to pxor for sse.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
- "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
- "pcmpeqd\t$dst, $dst",
+ // FIXME: Change encoding to pseudo.
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
// FR64 to 128-bit vector conversion.
[(set VR128:$dst,
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
TA, OpSize,
- Requires<[HasSSE41, NoSSEBreakDep]>;
+ Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,