-//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
#include "llvm/LLVMContext.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
#include <limits>
#define GET_INSTRINFO_CTOR
enum {
// Select which memory operand is being unfolded.
- // (stored in bits 0 - 7)
+ // (stored in bits 0 - 3)
TB_INDEX_0 = 0,
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
- TB_INDEX_MASK = 0xff,
+ TB_INDEX_3 = 3,
+ TB_INDEX_MASK = 0xf,
+
+ // Do not insert the reverse map (MemOp -> RegOp) into the table.
+ // This may be needed because there is a many -> one mapping.
+ TB_NO_REVERSE = 1 << 4,
+
+ // Do not insert the forward map (RegOp -> MemOp) into the table.
+ // This is needed for Native Client, which prohibits branch
+ // instructions from using a memory operand.
+ TB_NO_FORWARD = 1 << 5,
+
+ TB_FOLDED_LOAD = 1 << 6,
+ TB_FOLDED_STORE = 1 << 7,
// Minimum alignment required for load/store.
// Used for RegOp->MemOp conversion.
TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
-
- // Do not insert the reverse map (MemOp -> RegOp) into the table.
- // This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 16,
-
- // Do not insert the forward map (RegOp -> MemOp) into the table.
- // This is needed for Native Client, which prohibits branch
- // instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 17,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
+};
- TB_FOLDED_LOAD = 1 << 18,
- TB_FOLDED_STORE = 1 << 19
+struct X86OpTblEntry {
+ uint16_t RegOp;
+ uint16_t MemOp;
+ uint16_t Flags;
};
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- static const unsigned OpTbl2Addr[][3] = {
+ static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
{ X86::ADC32ri8, X86::ADC32mi8, 0 },
{ X86::ADC32rr, X86::ADC32mr, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
- unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- unsigned Flags = OpTbl2Addr[i][2];
+ unsigned RegOp = OpTbl2Addr[i].RegOp;
+ unsigned MemOp = OpTbl2Addr[i].MemOp;
+ unsigned Flags = OpTbl2Addr[i].Flags;
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
RegOp, MemOp,
// Index 0, folded load and store, no alignment requirement.
Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
- static const unsigned OpTbl0[][3] = {
+ static const X86OpTblEntry OpTbl0[] = {
{ X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
{ X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
{ X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
{ X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
{ X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
- { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD },
{ X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
{ X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
{ X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
{ X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
{ X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
// AVX 256-bit foldable instructions
+ { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Flags = OpTbl0[i][2];
+ unsigned RegOp = OpTbl0[i].RegOp;
+ unsigned MemOp = OpTbl0[i].MemOp;
+ unsigned Flags = OpTbl0[i].Flags;
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
RegOp, MemOp, TB_INDEX_0 | Flags);
}
- static const unsigned OpTbl1[][3] = {
+ static const X86OpTblEntry OpTbl1[] = {
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
{ X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
- { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
{ X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
{ X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 },
{ X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 },
{ X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 },
{ X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
{ X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
{ X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
// AVX 256-bit foldable instructions
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
// AVX2 foldable instructions
- { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_16 },
- { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_16 },
- { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_16 },
- { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_16 },
- { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_16 },
- { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_16 }
+ { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
+ { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
+ { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 },
+ { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 },
+ { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
+ { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
- unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
- unsigned Flags = OpTbl1[i][2];
+ unsigned RegOp = OpTbl1[i].RegOp;
+ unsigned MemOp = OpTbl1[i].MemOp;
+ unsigned Flags = OpTbl1[i].Flags;
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
RegOp, MemOp,
// Index 1, folded load
Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
- static const unsigned OpTbl2[][3] = {
+ static const X86OpTblEntry OpTbl2[] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
{ X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
{ X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
{ X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
+ { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
+ { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
+ { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
{ X86::CMOVA16rr, X86::CMOVA16rm, 0 },
{ X86::CMOVA32rr, X86::CMOVA32rm, 0 },
{ X86::CMOVA64rr, X86::CMOVA64rm, 0 },
{ X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
{ X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
{ X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
{ X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
{ X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
{ X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
{ X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
{ X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
{ X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
- { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 },
- { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 },
+ { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
+ { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
{ X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
- { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 },
+ { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
{ X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
- { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 },
+ { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
{ X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
{ X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
{ X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
{ X86::PORrr, X86::PORrm, TB_ALIGN_16 },
{ X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
- { X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 },
- { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 },
- { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 },
- { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 },
+ { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
+ { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
+ { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
+ { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
{ X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
{ X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
{ X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
{ X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
{ X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 },
{ X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
{ X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
{ X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
{ X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 },
{ X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 },
{ X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
{ X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
{ X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 },
{ X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
{ X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
{ X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
- { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
{ X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
- { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 },
- { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
{ X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
- { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
{ X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
{ X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 },
{ X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
{ X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
{ X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
{ X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
- { X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 },
- { X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 },
- { X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 },
- { X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 },
+ { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 },
+ { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 },
+ { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 },
{ X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
{ X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
{ X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
{ X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
{ X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 },
+ // AVX 256-bit foldable instructions
+ { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 },
+ { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 },
+ { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 },
+ { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 },
+ { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 },
+ { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 },
+ { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 },
// AVX2 foldable instructions
- { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_16 },
- { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_16 },
- { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_16 },
- { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_16 },
- { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_16 },
- { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_16 },
- { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_16 },
- { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_16 },
- { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_16 },
- { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_16 },
- { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_16 },
- { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_16 },
- { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_16 },
- { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_16 },
- { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_16 },
- { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_16 },
- { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_16 },
- { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_16 },
- { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_16 },
- { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_16 },
- { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_16 },
- { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_16 },
- { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_16 },
- { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_16 },
- { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_16 },
- { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_16 },
- { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_16 },
- { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_16 },
- { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_16 },
- { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_16 },
- { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_16 },
- { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_16 },
- { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_16 },
- { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_16 },
- { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_16 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_16 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_16 },
- { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_16 },
- { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_16 },
- { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_16 },
- { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_16 },
- { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_16 },
- { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_16 },
- { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_16 },
- { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_16 },
- { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_16 },
- { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_16 },
- { X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_16 },
- { X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_16 },
- { X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_16 },
- { X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_16 },
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 },
+ { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 },
+ { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 },
+ { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
+ { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
+ { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
+ { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 },
+ { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 },
+ { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 },
+ { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 },
+ { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 },
{ X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 },
{ X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 },
{ X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 },
{ X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 },
- { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_16 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 },
{ X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 },
- { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_16 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 },
{ X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 },
{ X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 },
{ X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 },
- { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_16 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 },
{ X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 },
{ X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 },
{ X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 },
{ X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 },
- { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_16 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 },
{ X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 },
- { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_16 },
- { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_16 },
- { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_16 },
- { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_16 },
- { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_16 },
- { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_16 },
- { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_16 },
- { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_16 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 },
{ X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 },
- { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_16 },
- { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_16 },
- { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_16 },
- { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_16 },
- { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_16 },
- { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
+ { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
- unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
- unsigned Flags = OpTbl2[i][2];
+ unsigned RegOp = OpTbl2[i].RegOp;
+ unsigned MemOp = OpTbl2[i].MemOp;
+ unsigned Flags = OpTbl2[i].Flags;
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
RegOp, MemOp,
// Index 2, folded load
Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
}
+
+ static const X86OpTblEntry OpTbl3[] = {
+ // FMA foldable instructions
+ { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 },
+ { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 },
+ { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
+ { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
+ { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
+ { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 },
+ { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 },
+
+ { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
+ { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
+ { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
+ { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
+ { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 },
+ { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 },
+
+ { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
+ { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
+ { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
+ { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
+ { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 },
+ { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 },
+
+ { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
+ { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
+ { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
+ { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
+ { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 },
+ { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 },
+
+ { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 },
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
+ unsigned RegOp = OpTbl3[i].RegOp;
+ unsigned MemOp = OpTbl3[i].MemOp;
+ unsigned Flags = OpTbl3[i].Flags;
+ AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 3, folded load
+ Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
+ }
+
}
void
switch (MI.getOpcode()) {
default:
llvm_unreachable(0);
- break;
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
static bool isFrameLoadOpcode(int Opcode) {
switch (Opcode) {
- default: break;
+ default:
+ return false;
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
return true;
- break;
}
- return false;
}
static bool isFrameStoreOpcode(int Opcode) {
bool SeenDef = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SeenDef = true;
if (!MO.isReg())
continue;
if (MO.getReg() == X86::EFLAGS) {
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
+ // A register mask may clobber EFLAGS, but we should still look for a
+ // live EFLAGS def.
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SawKill = true;
if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
if (MO.isDef()) return MO.isDead();
if (MO.isKill()) SawKill = true;
switch (MIOpc) {
default:
llvm_unreachable(0);
- break;
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
.addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
.addReg(B, getKillRegState(isKill)).addImm(M);
break;
}
+ case X86::SHUFPDrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+
+ // Convert to PSHUFD mask.
+ M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ const TargetRegisterClass *RC = MIOpc == X86::INC64r ?
+ (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
+ (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src,
- MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
- X86::GR32_NOSPRegisterClass))
+ !MF.getRegInfo().constrainRegClass(Src, RC))
return 0;
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ const TargetRegisterClass *RC = MIOpc == X86::DEC64r ?
+ (const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
+ (const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src,
- MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
- X86::GR32_NOSPRegisterClass))
+ !MF.getRegInfo().constrainRegClass(Src, RC))
return 0;
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc;
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
Opc = X86::LEA64r;
- RC = X86::GR64_NOSPRegisterClass;
+ RC = &X86::GR64_NOSPRegClass;
} else {
Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- RC = X86::GR32_NOSPRegisterClass;
+ RC = &X86::GR32_NOSPRegClass;
}
}
}
-static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
case X86::JE_4: return X86::COND_E;
}
}
+/// getCondFromSETOpc - return condition code of a SET opcode.
+static X86::CondCode getCondFromSETOpc(unsigned Opc) {
+ switch (Opc) {
+ default: return X86::COND_INVALID;
+ case X86::SETAr: case X86::SETAm: return X86::COND_A;
+ case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
+ case X86::SETBr: case X86::SETBm: return X86::COND_B;
+ case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
+ case X86::SETEr: case X86::SETEm: return X86::COND_E;
+ case X86::SETGr: case X86::SETGm: return X86::COND_G;
+ case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
+ case X86::SETLr: case X86::SETLm: return X86::COND_L;
+ case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
+ case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
+ case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
+ case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
+ case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
+ case X86::SETOr: case X86::SETOm: return X86::COND_O;
+ case X86::SETPr: case X86::SETPm: return X86::COND_P;
+ case X86::SETSr: case X86::SETSm: return X86::COND_S;
+ }
+}
+
+/// getCondFromCmovOpc - return condition code of a CMov opcode.
+static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
+ switch (Opc) {
+ default: return X86::COND_INVALID;
+ case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
+ case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
+ return X86::COND_A;
+ case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
+ case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
+ return X86::COND_AE;
+ case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
+ case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
+ return X86::COND_B;
+ case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
+ case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
+ return X86::COND_BE;
+ case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
+ case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
+ return X86::COND_E;
+ case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
+ case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
+ return X86::COND_G;
+ case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
+ case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
+ return X86::COND_GE;
+ case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
+ case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
+ return X86::COND_L;
+ case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
+ case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
+ return X86::COND_LE;
+ case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
+ case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
+ return X86::COND_NE;
+ case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
+ case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
+ return X86::COND_NO;
+ case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
+ case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
+ return X86::COND_NP;
+ case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
+ case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
+ return X86::COND_NS;
+ case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
+ case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
+ return X86::COND_O;
+ case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
+ case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
+ return X86::COND_P;
+ case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
+ case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
+ return X86::COND_S;
+ }
+}
+
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
}
}
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+X86::CondCode getSwappedCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: return X86::COND_INVALID;
+ case X86::COND_E: return X86::COND_E;
+ case X86::COND_NE: return X86::COND_NE;
+ case X86::COND_L: return X86::COND_G;
+ case X86::COND_LE: return X86::COND_GE;
+ case X86::COND_G: return X86::COND_L;
+ case X86::COND_GE: return X86::COND_LE;
+ case X86::COND_B: return X86::COND_A;
+ case X86::COND_BE: return X86::COND_AE;
+ case X86::COND_A: return X86::COND_B;
+ case X86::COND_AE: return X86::COND_BE;
+ }
+}
+
+/// getSETFromCond - Return a set opcode for the given condition and
+/// whether it has memory operand.
+static unsigned getSETFromCond(X86::CondCode CC,
+ bool HasMemoryOperand) {
+ static const unsigned Opc[16][2] = {
+ { X86::SETAr, X86::SETAm },
+ { X86::SETAEr, X86::SETAEm },
+ { X86::SETBr, X86::SETBm },
+ { X86::SETBEr, X86::SETBEm },
+ { X86::SETEr, X86::SETEm },
+ { X86::SETGr, X86::SETGm },
+ { X86::SETGEr, X86::SETGEm },
+ { X86::SETLr, X86::SETLm },
+ { X86::SETLEr, X86::SETLEm },
+ { X86::SETNEr, X86::SETNEm },
+ { X86::SETNOr, X86::SETNOm },
+ { X86::SETNPr, X86::SETNPm },
+ { X86::SETNSr, X86::SETNSm },
+ { X86::SETOr, X86::SETOm },
+ { X86::SETPr, X86::SETPm },
+ { X86::SETSr, X86::SETSm }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ return Opc[CC][HasMemoryOperand ? 1 : 0];
+}
+
+/// getCMovFromCond - Return a cmov opcode for the given condition,
+/// register size in bytes, and operand type.
+static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand) {
+ static const unsigned Opc[32][3] = {
+ { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
+ { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
+ { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
+ { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
+ { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
+ { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
+ { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
+ { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
+ { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
+ { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
+ { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
+ { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
+ { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
+ { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
+ { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
+ { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
+ { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
+ { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
+ { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
+ { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
+ { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
+ { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
+ { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
+ { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
+ { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
+ { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
+ { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
+ { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
+ { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
+ { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
+ { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
+ { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ unsigned Idx = HasMemoryOperand ? 16+CC : CC;
+ switch(RegBytes) {
+ default: llvm_unreachable("Illegal register size!");
+ case 2: return Opc[Idx][0];
+ case 4: return Opc[Idx][1];
+ case 8: return Opc[Idx][2];
+ }
+}
+
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
// A terminator that isn't a branch can't easily be handled by this
// analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Handle unconditional branches.
}
// Handle conditional branches.
- X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+ X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
if (I->isDebugValue())
continue;
if (I->getOpcode() != X86::JMP_4 &&
- GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
// Remove the branch.
I->eraseFromParent();
return Count;
}
+bool X86InstrInfo::
+canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ // Not all subtargets have cmov instructions.
+ if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ return false;
+ if (Cond.size() != 1)
+ return false;
+ // We cannot do the composite conditions, at least not in SSA form.
+ if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
+ if (X86::GR16RegClass.hasSubClassEq(RC) ||
+ X86::GR32RegClass.hasSubClassEq(RC) ||
+ X86::GR64RegClass.hasSubClassEq(RC)) {
+ // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
+ // Bridge. Probably Ivy Bridge as well.
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do vectors.
+ return false;
+}
+
+void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert(Cond.size() == 1 && "Invalid Cond array");
+ unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
+ MRI.getRegClass(DstReg)->getSize(),
+ false/*HasMemoryOperand*/);
+ BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+}
+
/// isHReg - Test if the given register is a physical h register.
static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
NewMIs.push_back(MIB);
}
+bool X86InstrInfo::
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case X86::CMP64rr:
+ case X86::CMP32rr:
+ case X86::CMP16rr:
+ case X86::CMP8rr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ }
+ return false;
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// This function can be extended later on.
+/// SrcReg, SrcRegs: register operands for FlagI.
+/// ImmValue: immediate for FlagI if it takes an immediate.
+inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if (((FlagI->getOpcode() == X86::CMP64rr &&
+ OI->getOpcode() == X86::SUB64rr) ||
+ (FlagI->getOpcode() == X86::CMP32rr &&
+ OI->getOpcode() == X86::SUB32rr)||
+ (FlagI->getOpcode() == X86::CMP16rr &&
+ OI->getOpcode() == X86::SUB16rr)||
+ (FlagI->getOpcode() == X86::CMP8rr &&
+ OI->getOpcode() == X86::SUB8rr)) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
+
+ if (((FlagI->getOpcode() == X86::CMP64ri32 &&
+ OI->getOpcode() == X86::SUB64ri32) ||
+ (FlagI->getOpcode() == X86::CMP64ri8 &&
+ OI->getOpcode() == X86::SUB64ri8) ||
+ (FlagI->getOpcode() == X86::CMP32ri &&
+ OI->getOpcode() == X86::SUB32ri) ||
+ (FlagI->getOpcode() == X86::CMP32ri8 &&
+ OI->getOpcode() == X86::SUB32ri8) ||
+ (FlagI->getOpcode() == X86::CMP16ri &&
+ OI->getOpcode() == X86::SUB16ri) ||
+ (FlagI->getOpcode() == X86::CMP16ri8 &&
+ OI->getOpcode() == X86::SUB16ri8) ||
+ (FlagI->getOpcode() == X86::CMP8ri &&
+ OI->getOpcode() == X86::SUB8ri)) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
+
+/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// operates on the same source operands and sets flags in the same way as
+/// Compare; remove Compare if possible.
+bool X86InstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+
+ // CmpInstr is the first instruction of the BB.
+ MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+
+ // We are searching for an earlier instruction that can make CmpInstr
+ // redundant and that instruction will be saved in Sub.
+ MachineInstr *Sub = NULL;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // We iterate backward, starting from the instruction before CmpInstr and
+ // stop when reaching the definition of a source register or done with the BB.
+ // RI points to the instruction before CmpInstr.
+ // If the definition is in this basic block, RE points to the definition;
+ // otherwise, RE is the rend of the basic block.
+ MachineBasicBlock::reverse_iterator
+ RI = MachineBasicBlock::reverse_iterator(I),
+ RE = CmpInstr->getParent() == MI->getParent() ?
+ MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
+ CmpInstr->getParent()->rend();
+ for (; RI != RE; ++RI) {
+ MachineInstr *Instr = &*RI;
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+ Sub = Instr;
+ break;
+ }
+
+ if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
+ Instr->readsRegister(X86::EFLAGS, TRI))
+ // This instruction modifies or uses EFLAGS.
+ // We can't remove CmpInstr.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!Sub)
+ return false;
+
+ bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg);
+
+ // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
+ // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
+ // If we are done with the basic block, we need to check whether EFLAGS is
+ // live-out.
+ bool IsSafe = false;
+ SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+ MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
+ for (++I; I != E; ++I) {
+ const MachineInstr &Instr = *I;
+ if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again.
+ IsSafe = true;
+ break;
+ }
+ if (!Instr.readsRegister(X86::EFLAGS, TRI))
+ continue;
+
+ // EFLAGS is used by this instruction.
+ if (IsSwapped) {
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
+ // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ // We decode the condition code from opcode, swap the condition code,
+ // and synthesize the new opcode.
+ bool OpcIsSET = false;
+ X86::CondCode OldCC;
+ if (Instr.isBranch())
+ OldCC = getCondFromBranchOpc(Instr.getOpcode());
+ else {
+ OldCC = getCondFromSETOpc(Instr.getOpcode());
+ if (OldCC != X86::COND_INVALID)
+ OpcIsSET = true;
+ else
+ OldCC = getCondFromCMovOpc(Instr.getOpcode());
+ }
+ if (OldCC == X86::COND_INVALID) return false;
+ X86::CondCode NewCC = getSwappedCondition(OldCC);
+ if (NewCC == X86::COND_INVALID) return false;
+
+ // Synthesize the new opcode.
+ bool HasMemoryOperand = Instr.hasOneMemOperand();
+ unsigned NewOpc;
+ if (Instr.isBranch())
+ NewOpc = GetCondBranchFromCond(NewCC);
+ else if(OpcIsSET)
+ NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
+ else {
+ unsigned DstReg = Instr.getOperand(0).getReg();
+ NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
+ HasMemoryOperand);
+ }
+
+ // Push the MachineInstr to OpsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // instructions will be modified.
+ OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+ }
+ if (Instr.killsRegister(X86::EFLAGS, TRI)) {
+ IsSafe = true;
+ break;
+ }
+ }
+
+ // If EFLAGS is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (IsSwapped && !IsSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+
+ // Make sure Sub instruction defines EFLAGS.
+ assert(Sub->getNumOperands() >= 4 && Sub->getOperand(3).isReg() &&
+ Sub->getOperand(3).getReg() == X86::EFLAGS &&
+ "EFLAGS should be the 4th operand of SUBrr or SUBri.");
+ Sub->getOperand(3).setIsDef(true);
+ CmpInstr->eraseFromParent();
+
+ // Modify the condition code of instructions in OpsToUpdate.
+ for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
+ OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+ return true;
+}
+
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
/// instruction with two undef reads of the register being defined. This is
/// used for mapping:
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (MI->getOpcode()) {
case X86::V_SET0:
+ case X86::FsFLD0SS:
+ case X86::FsFLD0SD:
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return NULL;
bool NarrowToMOV32rm = false;
if (Size) {
- unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
+ unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
if (Size < RCSize) {
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
case X86::RCPSSr:
case X86::RCPSSr_Int:
case X86::ROUNDSDr:
+ case X86::ROUNDSDr_Int:
case X86::ROUNDSSr:
+ case X86::ROUNDSSr_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
case X86::Int_VCVTSS2SDrr:
case X86::VRCPSSr:
case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
case X86::VRSQRTSSr:
case X86::VSQRTSSr:
return true;
switch (LoadMI->getOpcode()) {
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
Alignment = 32;
break;
case X86::V_SET0:
Alignment = 16;
break;
case X86::FsFLD0SD:
- case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
- case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX_SETALLONES:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
case X86::FsFLD0SD:
- case X86::FsFLD0SS:
- case X86::VFsFLD0SD:
- case X86::VFsFLD0SS: {
+ case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
MachineConstantPool &MCP = *MF.getConstantPool();
Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
+ Opc == X86::AVX2_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
UnfoldStore &= FoldedStore;
const MCInstrDesc &MCID = get(Opc);
- const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Emit the store instruction.
if (UnfoldStore) {
- const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
+ const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
std::pair<MachineInstr::mmo_iterator,
MachineInstr::mmo_iterator> MMOs =
MF.extractStoreMemRefs(MI->memoperands_begin(),
bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
bool FoldedStore = I->second.second & TB_FOLDED_STORE;
const MCInstrDesc &MCID = get(Opc);
- const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
unsigned NumDefs = MCID.NumDefs;
std::vector<SDValue> AddrOps;
std::vector<SDValue> BeforeOps;
// Emit the load instruction.
SDNode *Load = 0;
- MachineFunction &MF = DAG.getMachineFunction();
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
std::pair<MachineInstr::mmo_iterator,
std::vector<EVT> VTs;
const TargetRegisterClass *DstRC = 0;
if (MCID.getNumDefs() > 0) {
- DstRC = getRegClass(MCID, 0, &RI);
+ DstRC = getRegClass(MCID, 0, &RI, MF);
VTs.push_back(*DstRC->vt_begin());
}
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
// Create the register. The code to initialize it is inserted
// later, by the CGBR pass (below).
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
// These are the replaceable SSE instructions. Some of these have Int variants
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
{ X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
};
-static const unsigned ReplaceableInstrsAVX2[][3] = {
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
{ X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
{ X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
return 0;
}
-static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) {
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
return ReplaceableInstrsAVX2[i];
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
- const unsigned *table = lookup(MI->getOpcode(), dom);
- if (!table) // try the other table
+ const uint16_t *table = lookup(MI->getOpcode(), dom);
+ if (!table) { // try the other table
+ assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ "256-bit vector operations only available in AVX2");
table = lookupAVX2(MI->getOpcode(), dom);
+ }
assert(table && "Cannot change domain");
MI->setDesc(get(table[Domain-1]));
}
unsigned PC;
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
- PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
else
PC = GlobalBaseReg;
char CGBR::ID = 0;
FunctionPass*
llvm::createGlobalBaseRegPass() { return new CGBR(); }
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ is64Bit ? X86::RAX : X86::EAX)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF->getTarget());
+ const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
+ ? &X86::GR64RegClass
+ : &X86::GR32RegClass);
+
+ // Insert a copy from RAX/EAX to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(is64Bit ? X86::RAX : X86::EAX);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }