case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
MachineBasicBlock *MBB = Inst->getParent();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Opcode = Inst->getOpcode();
+
// Handle some special cases
- switch(Inst->getOpcode()) {
+ switch (Opcode) {
case AMDGPU::S_MOV_B64: {
DebugLoc DL = Inst->getDebugLoc();
Inst->RemoveOperand(i);
}
- // Add the implict and explicit register definitions.
- if (NewDesc.ImplicitUses) {
- for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitUses[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
- }
+ if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+ // We are converting these to a BFE, so we need to add the missing
+ // operands for the size and offset.
+ unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(Size));
+
+ // XXX - Other pointless operands. There are 4, but it seems you only need
+ // 3 to not hit an assertion later in MCInstLower.
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
}
- if (NewDesc.ImplicitDefs) {
- for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitDefs[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
- }
- }
+ addDescImplicitUseDef(NewDesc, Inst);
// Update the destination register class.
const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
- switch (Inst->getOpcode()) {
+ switch (Opcode) {
// For target instructions, getOpRegClass just returns the virtual
// register class associated with the operand, so we need to find an
// equivalent VGPR register class in order to move the instruction to the
Worklist.push_back(HiHalf);
}
+void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
+ MachineInstr *Inst) const {
+ // Add the implict and explicit register definitions.
+ if (NewDesc.ImplicitUses) {
+ for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitUses[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+ }
+ }
+
+ if (NewDesc.ImplicitDefs) {
+ for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitDefs[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+}
+
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
-//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
+ [(set i32:$dst, (sext_inreg i32:$src0, i8))]
+>;
+def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
+ [(set i32:$dst, (sext_inreg i32:$src0, i16))]
+>;
////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
>;
*/
+// Handle sext_inreg in i64
+def : Pat <
+ (i64 (sext_inreg i64:$src, i8)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
+ (S_MOV_B32 -1), sub1)
+>;
+
+def : Pat <
+ (i64 (sext_inreg i64:$src, i16)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
+ (S_MOV_B32 -1), sub1)
+>;
+
let isCompare = 1 in {
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
; FUNC-LABEL: @sext_in_reg_i8_to_i32
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
; FUNC-LABEL: @sext_in_reg_i16_to_i32
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
+; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
}
; FUNC-LABEL: @sext_in_reg_i8_to_i64
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
-; SI: BUFFER_STORE_DWORD
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
}
; FUNC-LABEL: @sext_in_reg_i16_to_i64
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
-; SI: BUFFER_STORE_DWORD
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
}
; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
; SI: BUFFER_STORE_DWORDX2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
}
; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
; SI: BUFFER_STORE_DWORDX4
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
}
; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
; SI: BUFFER_STORE_DWORDX2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <2 x i32> %c, <i32 24, i32 24>
- %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
+ %shl = shl <2 x i32> %c, <i32 16, i32 16>
+ %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
ret void
}