return (int64_t)N->getValue() == (int8_t)N->getValue();
}]>;
-def sextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (sextloadi1 node:$ptr))>;
+def i64immFFFFFFFF : PatLeaf<(i64 imm), [{
+ // i64immFFFFFFFF - True if this is a specific constant we can't write in
+ // tblgen files.
+ return N->getValue() == 0x00000000FFFFFFFFULL;
+}]>;
+
+
def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
let isCall = 1 in
// All calls clobber the non-callee saved registers...
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS] in {
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))]>, TB;
-// Exchange
-let neverHasSideEffects = 1 in {
-def XCHG64rr : RI<0x87, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
-let mayLoad = 1, mayStore = 1 in {
-def XCHG64mr : RI<0x87, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
- "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
-}
-}
// Bit scan instructions.
let Defs = [EFLAGS] in {
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
"shl{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-let neverHasSideEffects = 1 in
-def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t$dst", []>;
+// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
+// cheaper.
} // isTwoAddress
let Uses = [CL] in
def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+
let isTwoAddress = 1 in {
def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1, neverHasSideEffects = 1 in {
-def Int_CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- []>; // TODO: add intrinsic
-let mayLoad = 1 in
-def Int_CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- []>; // TODO: add intrinsic
-} // isTwoAddress
+
+let isTwoAddress = 1 in {
+ def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
+ "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse_cvtsi642ss VR128:$src1,
+ GR64:$src2))]>;
+ def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse_cvtsi642ss VR128:$src1,
+ (loadi64 addr:$src2)))]>;
+}
// f32 -> signed i64
def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
[(set GR64:$dst,
(int_x86_sse_cvttss2si64 (load addr:$src)))]>;
-let isTwoAddress = 1 in {
- def Int_CVTSI642SSrr : RSSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- GR64:$src2))]>;
- def Int_CVTSI642SSrm : RSSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- (loadi64 addr:$src2)))]>;
-}
-
//===----------------------------------------------------------------------===//
// Alias Instructions
//===----------------------------------------------------------------------===//
"mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
[(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+/// PsAND64rrFFFFFFFF - r = r & (2^32-1)
+def PsAND64rrFFFFFFFF
+ : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{l}\t{${src:subreg32}, ${dst:subreg32}|${dst:subreg32}, ${src:subreg32}}",
+ [(set GR64:$dst, (and GR64:$src, i64immFFFFFFFF))]>;
+
// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
// equivalent due to implicit zero-extending, and it sometimes has a smaller
"mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
[(set GR64:$dst, i64immZExt32:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Atomic Instructions
+//===----------------------------------------------------------------------===//
+
+//FIXME: Please check the format Pseudo is certainly wrong, but the opcode and
+// prefixes should be correct
+
+let Defs = [RAX, EFLAGS], Uses = [RAX] in {
+def CMPXCHG64 : RI<0xB1, Pseudo, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "cmpxchgq $swap,$ptr", []>, TB;
+def LCMPXCHG64 : RI<0xB1, Pseudo, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock cmpxchgq $swap,$ptr",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+let Constraints = "$val = $dst", Defs = [EFLAGS] in {
+def LXADD64 : RI<0xC1, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "lock xadd $val, $ptr",
+ [(set GR64:$dst, (atomic_las_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+def XADD64 : RI<0xC1, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "xadd $val, $ptr", []>, TB;
+def LXCHG64 : RI<0x87, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "lock xchg $val, $ptr",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>, LOCK;
+def XCHG64 : RI<0x87, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "xchg $val, $ptr", []>;
+}
+
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tconstpool:$src)>,
- Requires<[SmallCode, HasLow4G, IsStatic]>;
+ Requires<[SmallCode, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tjumptable:$src)>,
- Requires<[SmallCode, HasLow4G, IsStatic]>;
+ Requires<[SmallCode, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
- Requires<[SmallCode, HasLow4G, IsStatic]>;
+ Requires<[SmallCode, IsStatic]>;
def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
(MOV64mi32 addr:$dst, texternalsym:$src)>,
- Requires<[SmallCode, HasLow4G, IsStatic]>;
+ Requires<[SmallCode, IsStatic]>;
// Calls
// Direct PC relative function call for small code model. 32-bit displacement
def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)),
(TEST64rr GR64:$src1, GR64:$src1)>;
-// {s|z}extload bool -> {s|z}extload byte
-def : Pat<(sextloadi64i1 addr:$src), (MOVSX64rm8 addr:$src)>;
+// zextload bool -> zextload byte
def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
// extload
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AI<opc, MRMSrcReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AI<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+
+let isTwoAddress = 1 in {
+ multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize, REX_W;
+ def rm : SS4AI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize, REX_W;
+ }
+}
+
+defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;