R600: Non vector only instruction can be scheduled on trans unit

author Vincent Lejeune <vljn@ovi.com>

Wed, 31 Jul 2013 19:31:56 +0000 (19:31 +0000)

committer Vincent Lejeune <vljn@ovi.com>

Wed, 31 Jul 2013 19:31:56 +0000 (19:31 +0000)
author Vincent Lejeune <vljn@ovi.com>
Wed, 31 Jul 2013 19:31:56 +0000 (19:31 +0000)
committer Vincent Lejeune <vljn@ovi.com>
Wed, 31 Jul 2013 19:31:56 +0000 (19:31 +0000)
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp

index 83f8e71fbfacdd6089bd5faf6e5773a50233ee96..7ef3d85123a1dd48697c843b91dc5c021ff2b545 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -469,6 +469,9 @@ static bool
  isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
                    const std::vector<std::pair<int, unsigned> > &TransOps,
                    unsigned ConstCount) {
+  // TransALU can't read 3 constants
+  if (ConstCount > 2)
+    return false;
    for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
      const std::pair<int, unsigned> &Src = TransOps[i];
      unsigned Cycle = getTransSwizzle(TransSwz, i);
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp

index 0dc0365926ec59a6d175b676296123213473f0ba..0499dd52d923db8d37f4f8900742b995d489a8be 100644 (file)
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -9,7 +9,6 @@
  //
  /// \file
  /// \brief R600 Machine Scheduler interface
-// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
  //
  //===----------------------------------------------------------------------===//
  
@@ -29,6 +28,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
    DAG = dag;
    TII = static_cast<const R600InstrInfo*>(DAG->TII);
    TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+  VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
    MRI = &DAG->MRI;
    CurInstKind = IDOther;
    CurEmitted = 0;
@@ -342,14 +342,16 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
    }
  }
  
-SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
    if (Q.empty())
      return NULL;
    for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
        It != E; ++It) {
      SUnit *SU = *It;
      InstructionsGroupCandidate.push_back(SU->getInstr());
-    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
+    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
+        && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
+    ) {
        InstructionsGroupCandidate.pop_back();
        Q.erase((It + 1).base());
        return SU;
@@ -373,6 +375,8 @@ void R600SchedStrategy::PrepareNextSlot() {
    DEBUG(dbgs() << "New Slot\n");
    assert (OccupedSlotsMask && "Slot wasn't filled");
    OccupedSlotsMask = 0;
+//  if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+//    OccupedSlotsMask |= 16;
    InstructionsGroupCandidate.clear();
    LoadAlu();
  }
@@ -409,12 +413,12 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
    }
  }
  
-SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
    static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
-  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
    if (SlotedSU)
      return SlotedSU;
-  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
    if (UnslotedSU)
      AssignSlot(UnslotedSU->getInstr(), Slot);
    return UnslotedSU;
@@ -434,30 +438,35 @@ SUnit* R600SchedStrategy::pickAlu() {
        // Bottom up scheduling : predX must comes first
        if (!AvailableAlus[AluPredX].empty()) {
          OccupedSlotsMask |= 31;
-        return PopInst(AvailableAlus[AluPredX]);
+        return PopInst(AvailableAlus[AluPredX], false);
        }
        // Flush physical reg copies (RA will discard them)
        if (!AvailableAlus[AluDiscarded].empty()) {
          OccupedSlotsMask |= 31;
-        return PopInst(AvailableAlus[AluDiscarded]);
+        return PopInst(AvailableAlus[AluDiscarded], false);
        }
        // If there is a T_XYZW alu available, use it
        if (!AvailableAlus[AluT_XYZW].empty()) {
          OccupedSlotsMask |= 15;
-        return PopInst(AvailableAlus[AluT_XYZW]);
+        return PopInst(AvailableAlus[AluT_XYZW], false);
        }
      }
      bool TransSlotOccuped = OccupedSlotsMask & 16;
-    if (!TransSlotOccuped) {
+    if (!TransSlotOccuped && VLIW5) {
        if (!AvailableAlus[AluTrans].empty()) {
          OccupedSlotsMask |= 16;
-        return PopInst(AvailableAlus[AluTrans]);
+        return PopInst(AvailableAlus[AluTrans], false);
+      }
+      SUnit *SU = AttemptFillSlot(3, true);
+      if (SU) {
+        OccupedSlotsMask |= 16;
+        return SU;
        }
      }
      for (int Chan = 3; Chan > -1; --Chan) {
        bool isOccupied = OccupedSlotsMask & (1 << Chan);
        if (!isOccupied) {
-        SUnit *SU = AttemptFillSlot(Chan);
+        SUnit *SU = AttemptFillSlot(Chan, false);
          if (SU) {
            OccupedSlotsMask |= (1 << Chan);
            InstructionsGroupCandidate.push_back(SU->getInstr());
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h

index f8965d8998a4cac8bd66834203c9f524cf17d99f..0a6f1204a4d913d4144ec44ec8ce7dea6dd639c6 100644 (file)
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -84,15 +84,16 @@ public:
  
  private:
    std::vector<MachineInstr *> InstructionsGroupCandidate;
+  bool VLIW5;
  
    int getInstKind(SUnit *SU);
    bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
    AluKind getAluKind(SUnit *SU) const;
    void LoadAlu();
    unsigned AvailablesAluCount() const;
-  SUnit *AttemptFillSlot (unsigned Slot);
+  SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
    void PrepareNextSlot();
-  SUnit *PopInst(std::vector<SUnit*> &Q);
+  SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
  
    void AssignSlot(MachineInstr *MI, unsigned Slot);
    SUnit* pickAlu();
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp

index 5cf1fd3b665e06a6b8ad7a974bb027219d2cf27f..6c70052b20388bdc7288d5e00ff7bee10af5f302 100644 (file)
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -58,6 +58,8 @@ class R600PacketizerList : public VLIWPacketizerList {
  private:
    const R600InstrInfo *TII;
    const R600RegisterInfo &TRI;
+  bool VLIW5;
+  bool ConsideredInstUsesAlreadyWrittenVectorElement;
  
    unsigned getSlot(const MachineInstr *MI) const {
      return TRI.getHWRegChan(MI->getOperand(0).getReg());
@@ -74,7 +76,13 @@ private:
      MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
      if (I->isBundle())
        BI++;
+    int LastDstChan = -1;
      do {
+      bool isTrans = false;
+      int BISlot = getSlot(BI);
+      if (LastDstChan >= BISlot)
+        isTrans = true;
+      LastDstChan = BISlot;
        if (TII->isPredicated(BI))
          continue;
        int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
@@ -85,7 +93,7 @@ private:
          continue;
        }
        unsigned Dst = BI->getOperand(DstIdx).getReg();
-      if (TII->isTransOnly(BI)) {
+      if (isTrans || TII->isTransOnly(BI)) {
          Result[Dst] = AMDGPU::PS;
          continue;
        }
@@ -142,10 +150,14 @@ public:
                          MachineDominatorTree &MDT)
    : VLIWPacketizerList(MF, MLI, MDT, true),
      TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
-    TRI(TII->getRegisterInfo()) { }
+    TRI(TII->getRegisterInfo()) {
+    VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+  }
  
    // initPacketizerState - initialize some internal flags.
-  void initPacketizerState() { }
+  void initPacketizerState() {
+    ConsideredInstUsesAlreadyWrittenVectorElement = false;
+  }
  
    // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
    bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
@@ -172,8 +184,8 @@ public:
    // together.
    bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
      MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
-    if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
-      return false;
+    if (getSlot(MII) == getSlot(MIJ))
+      ConsideredInstUsesAlreadyWrittenVectorElement = true;
      // Does MII and MIJ share the same pred_sel ?
      int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
          OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
@@ -211,6 +223,20 @@ public:
                                   std::vector<R600InstrInfo::BankSwizzle> &BS,
                                   bool &isTransSlot) {
      isTransSlot = TII->isTransOnly(MI);
+    assert (!isTransSlot || VLIW5);
+
+    // Is the dst reg sequence legal ?
+    if (!isTransSlot && !CurrentPacketMIs.empty()) {
+      if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
+        if (ConsideredInstUsesAlreadyWrittenVectorElement  &&
+            !TII->isVectorOnly(MI) && VLIW5) {
+          isTransSlot = true;
+          DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
+        }
+        else
+          return false;
+      }
+    }
  
      // Are the Constants limitations met ?
      CurrentPacketMIs.push_back(MI);
@@ -278,6 +304,8 @@ public:
        return It;
      }
      endPacket(MI->getParent(), MI);
+    if (TII->isTransOnly(MI))
+      return MI;
      return VLIWPacketizerList::addToPacket(MI);
    }
  };
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll

index 44c21bd47c1a5fcccd1d867d97074408862eaf35..03019ee961127c1b50d277fefb1633534638854a 100644 (file)
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -21,7 +21,7 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
  ;EG-CHECK: @test4
  ;EG-CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  ;SI-CHECK: @test4
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll

index 9a672329e75ce33e557864152539726958998c90..205715db946ad981662d245ad1f12c918e1a4cff 100644 (file)
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.store.output(float, i32)
  ; CHECK: @fadd_v4f32
  ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll

index 7373a214790ee067153c798a8f591db791fbd440..1d4e323d3abf8630dee14037fbcf15cf1cbfaab3 100644 (file)
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -2,7 +2,7 @@
  
  ;Not checking arguments 2 and 3 to CNDE, because they may change between
  ;registers and literal.x depending on what the optimizer does.
-;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
  entry:
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll

index dc3a779dd6093d58d37b2a542e8b1ff81bbe260d..c76a7587656558b95b0cf62b15aeac6fdcaa32b8 100644 (file)
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,7 +1,7 @@
  ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
  ; CHECK: @fcmp_sext
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: SETE_DX10  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
  entry:
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll

index c581ec9b9cc70ac86032432d963f287adf9bbe11..acd79245a07dd94517dac328e9ffd6f79f784693 100644 (file)
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,13 +1,13 @@
  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
  ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
-;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
+;CHECK-DAG: MUL_IEEE  T{{[0-9]+\.[XYZW]}}
  ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
-;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
+;CHECK-DAG: MUL_IEEE  T{{[0-9]+\.[XYZW]}}
  ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
-;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
+;CHECK-DAG: MUL_IEEE  T{{[0-9]+\.[XYZW]}}
  ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
-;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
+;CHECK-DAG: MUL_IEEE  T{{[0-9]+\.[XYZW]}}
  
  define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
  entry:
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll

index a40e818c12ce502fe4d469c3ef1239abbb8d28a4..eec673c958820b5a930931f31bea358e158b9fb5 100644 (file)
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.store.output(float, i32)
  ; CHECK: @fmul_v4f32
  ; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll

index 74a58f74026ab7ba5611e8d4a89592dbf78261e7..b58df70c89202f69d0cf2490998305d3a11ef93f 100644 (file)
--- a/test/CodeGen/R600/fmul.v4f32.ll
+++ b/test/CodeGen/R600/fmul.v4f32.ll
@@ -2,7 +2,7 @@
  
  ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll

index 799db0c74c1d9ac221fc2c56bbf2a83760998900..f7083cd6ca0596cb07c02963cebc19526aacdec2 100644 (file)
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -12,7 +12,7 @@ entry:
  
  ; CHECK: @fneg_v4
  ; CHECK: -PV
-; CHECK: -PV
+; CHECK: -T
  ; CHECK: -PV
  ; CHECK: -PV
  define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll

index 7b430e3744fc47a019dd15744f12e13183e773b0..c05163105cd90283f38de9553f0d46dc1685fc99 100644 (file)
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -2,9 +2,9 @@
  ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
  
  ; R600-CHECK: @fp_to_sint_v4i32
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
  ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
  ; SI-CHECK: @fp_to_sint_v4i32
  ; SI-CHECK: V_CVT_I32_F32_e32
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll

index b07e286f43e2f9996b52a521eaa944e9279b69d0..55d473b2b0414dec361075e071c86fc4eb955fc5 100644 (file)
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -2,7 +2,7 @@
  
  ; CHECK: @fp_to_uint_v4i32
  ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW]}}
  ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
  ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
  
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll

index f784cde46cd249657a8ef7060390c4a87061abb4..b712560b0a860ca16736235c5a21c0517df02cd8 100644 (file)
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.store.output(float, i32)
  ; CHECK: @fsub_v4f32
  ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll

index e3005fe82da1571156319649c3d0229b2f06e3ec..71705a64f50e5f79165939635bbacc713b0bf053 100644 (file)
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -3,7 +3,7 @@
  ;Test that a select with reversed True/False values is correctly lowered
  ;to a SETNE_INT.  There should only be one SETNE_INT instruction.
  
-;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  ;CHECK-NOT: SETNE_INT
  
  define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll

index 3d70e4bd54aa25803bc207b417bfe759ea965af2..8bdb050a126e9836d34928980a16c14b50b409d5 100644 (file)
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,7 +1,7 @@
  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
  ; CHECK: @main1
-; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
+; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
  define void @main1() {
  main_body:
    %0 = load <4 x float> addrspace(8)* null
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll

index 77b168ebdee30df0c60de85f96c2c0a331718b4d..9f4660ab8d760f1f9d4a4f35c0a4d4d63f31dcc5 100644 (file)
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -7,7 +7,8 @@
  ; ADD_INT literal.x KC0[2].Z, 5
  
  ; CHECK: @i32_literal
-; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK: ADD_INT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 5
  define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
  entry:
@@ -23,7 +24,8 @@ entry:
  ; ADD literal.x KC0[2].Z, 5.0
  
  ; CHECK: @float_literal
-; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK: ADD T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.0
  define void @float_literal(float addrspace(1)* %out, float %in) {
  entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll

index 7627783ce36812bf2788fc500d7430814e758308..b9be9c68309fa364e639ef0578459303e091d040 100644 (file)
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -2,7 +2,7 @@
  ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
  
  ; R600-CHECK: @amdgpu_trunc
-; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
  ; SI-CHECK: @amdgpu_trunc
  ; SI-CHECK: V_TRUNC_F32
  
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll

index 6d3610e101eeeadaef88e9295afa8a00acfc4b86..4a012d91eadf19175da34a85ee7f3f62c61111de 100644 (file)
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -12,7 +12,7 @@
  ; CHECK-NEXT: .long 8
  
  ; Make sure the lds writes are using different addresses.
-; CHECK: LDS_WRITE {{[*]*}} {{PV|T}}[[ADDRW:[0-9]*\.[XYZW]]]
+; CHECK: LDS_WRITE {{\** *}}{{PV|T}}[[ADDRW:[0-9]*\.[XYZW]]]
  ; CHECK-NOT: LDS_WRITE {{[*]*}} T[[ADDRW]]
  
  ; GROUP_BARRIER must be the last instruction in a clause
diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll

index 5c4c4e93c2188209f170c840d92a5c95ef10f164..c3995b64eb509b4e84c5eb4d61969305d2c4342e 100644 (file)
--- a/test/CodeGen/R600/rotr.ll
+++ b/test/CodeGen/R600/rotr.ll
@@ -19,7 +19,8 @@ entry:
  ; R600-CHECK: @rotl
  ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
  ; R600-CHECK-NEXT: 32
-; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
+; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
+
  
  ; SI-CHECK: @rotl
  ; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}
diff --git a/test/CodeGen/R600/selectcc-cnd.ll b/test/CodeGen/R600/selectcc-cnd.ll

index d7287b487896045d8111a02756bac56736a28270..0bfca69374880dca8d13bfe351a884e03c63f815 100644 (file)
--- a/test/CodeGen/R600/selectcc-cnd.ll
+++ b/test/CodeGen/R600/selectcc-cnd.ll
@@ -1,8 +1,8 @@
  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
  ;CHECK-NOT: SETE
-;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
-;CHECK-NEXT: {{[-0-9]+\(2.0}}
+;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
+;CHECK: 1073741824
  define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
    %1 = load float addrspace(1)* %in
    %2 = fcmp oeq float %1, 0.0
diff --git a/test/CodeGen/R600/selectcc-cnde-int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll

index 768dc7dbf418d84d9030c4c1cdaa9baef9a0556e..d568888f7cb254fbaeb7128b0855cff2629b6781 100644 (file)
--- a/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -1,7 +1,7 @@
  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
  ;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
+;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
  ;CHECK-NEXT: 2
  define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    %1 = load i32 addrspace(1)* %in
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll

index 291a7bd85ac82924525fac17d65049d58056ab3d..bdc2ff40b76be7ec5c4595817119974ed641d2dd 100644 (file)
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -5,7 +5,8 @@
  ; SET*DX10 instructions.
  
  ; CHECK: @fcmp_une_select_fptosi
-; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -18,7 +19,8 @@ entry:
  }
  
  ; CHECK: @fcmp_une_select_i32
-; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -29,7 +31,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ueq_select_fptosi
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -42,7 +45,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ueq_select_i32
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -53,7 +57,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ugt_select_fptosi
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -66,7 +71,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ugt_select_i32
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -77,7 +83,8 @@ entry:
  }
  
  ; CHECK: @fcmp_uge_select_fptosi
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -90,7 +97,8 @@ entry:
  }
  
  ; CHECK: @fcmp_uge_select_i32
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -101,7 +109,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ule_select_fptosi
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -114,7 +123,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ule_select_i32
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -125,7 +135,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ult_select_fptosi
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
  entry:
@@ -138,7 +149,8 @@ entry:
  }
  
  ; CHECK: @fcmp_ult_select_i32
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
  entry:
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll

index 10fce6cfa4a8991902c305eaa65e3f52e3e453bb..c7fed03d9492c83822609fd91f2678c59ffd9a1e 100644 (file)
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -2,8 +2,8 @@
  ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
  
  ;EG-CHECK: @test2
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  ;SI-CHECK: @test2
  ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
@@ -19,10 +19,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
  }
  
  ;EG-CHECK: @test4
-;EG-CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  ;SI-CHECK: @test4
  ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll

index cf29833b1913aae87206415c2ab9f1c88d27e513..d3aa060adc0e08d90a4e7c2c8832294082613e92 100644 (file)
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -3,7 +3,8 @@
  ; These tests are for condition codes that are not supported by the hardware
  
  ; CHECK: @slt
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 5(7.006492e-45)
  define void @slt(i32 addrspace(1)* %out, i32 %in) {
  entry:
@@ -14,7 +15,8 @@ entry:
  }
  
  ; CHECK: @ult_i32
-; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 5(7.006492e-45)
  define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
  entry:
@@ -25,7 +27,8 @@ entry:
  }
  
  ; CHECK: @ult_float
-; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @ult_float(float addrspace(1)* %out, float %in) {
  entry:
@@ -36,7 +39,8 @@ entry:
  }
  
  ; CHECK: @olt
-; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ;CHECK-NEXT: 1084227584(5.000000e+00)
  define void @olt(float addrspace(1)* %out, float %in) {
  entry:
@@ -47,7 +51,8 @@ entry:
  }
  
  ; CHECK: @sle
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 6(8.407791e-45)
  define void @sle(i32 addrspace(1)* %out, i32 %in) {
  entry:
@@ -58,7 +63,8 @@ entry:
  }
  
  ; CHECK: @ule_i32
-; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 6(8.407791e-45)
  define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
  entry:
@@ -69,7 +75,8 @@ entry:
  }
  
  ; CHECK: @ule_float
-; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT: 1084227584(5.000000e+00)
  define void @ule_float(float addrspace(1)* %out, float %in) {
  entry:
@@ -80,7 +87,8 @@ entry:
  }
  
  ; CHECK: @ole
-; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
  ; CHECK-NEXT:1084227584(5.000000e+00)
  define void @ole(float addrspace(1)* %out, float %in) {
  entry:
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll

index 72a90849a78daafc6e6e1e6668d84bc8bd1ae8ca..28123ef4ca6660712b2e3c7d911e899b0e3bf6ed 100644 (file)
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -2,8 +2,8 @@
  ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
  
  ;EG-CHECK: @test_select_v2i32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  ;SI-CHECK: @test_select_v2i32
  ;SI-CHECK: V_CNDMASK_B32_e64
@@ -20,8 +20,8 @@ entry:
  }
  
  ;EG-CHECK: @test_select_v2f32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  ;SI-CHECK: @test_select_v2f32
  ;SI-CHECK: V_CNDMASK_B32_e64
@@ -38,10 +38,10 @@ entry:
  }
  
  ;EG-CHECK: @test_select_v4i32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  ;SI-CHECK: @test_select_v4i32
  ;SI-CHECK: V_CNDMASK_B32_e64
@@ -60,10 +60,10 @@ entry:
  }
  
  ;EG-CHECK: @test_select_v4f32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  
  define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
  entry:
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll

index 7998983ab2f853217379c83dce19cde0d5881491..86195ae88a99d38704aed280639e4ba16c63a005 100644 (file)
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -3,7 +3,7 @@
  
  ; R600-CHECK: @ngroups_x
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].X
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].X
  ; SI-CHECK: @ngroups_x
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -17,7 +17,7 @@ entry:
  
  ; R600-CHECK: @ngroups_y
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].Y
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].Y
  ; SI-CHECK: @ngroups_y
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -31,7 +31,7 @@ entry:
  
  ; R600-CHECK: @ngroups_z
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].Z
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].Z
  ; SI-CHECK: @ngroups_z
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -45,7 +45,7 @@ entry:
  
  ; R600-CHECK: @global_size_x
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].W
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].W
  ; SI-CHECK: @global_size_x
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -59,7 +59,7 @@ entry:
  
  ; R600-CHECK: @global_size_y
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].X
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].X
  ; SI-CHECK: @global_size_y
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -73,7 +73,7 @@ entry:
  
  ; R600-CHECK: @global_size_z
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].Y
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].Y
  ; SI-CHECK: @global_size_z
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -87,7 +87,7 @@ entry:
  
  ; R600-CHECK: @local_size_x
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].Z
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].Z
  ; SI-CHECK: @local_size_x
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -101,7 +101,7 @@ entry:
  
  ; R600-CHECK: @local_size_y
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].W
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].W
  ; SI-CHECK: @local_size_y
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@@ -115,7 +115,7 @@ entry:
  
  ; R600-CHECK: @local_size_z
  ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[2].X
+; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[2].X
  ; SI-CHECK: @local_size_z
  ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8
  ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll

new file mode 100644 (file)

index 0000000..c158076
--- /dev/null
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; We want all MULLO_INT inst to be last in their instruction group
+;CHECK: @fill3d
+;CHECK-NOT: MULLO_INT T[0-9]+
+
+; ModuleID = 'radeon'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+; Function Attrs: nounwind
+define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
+entry:
+  %x.i = tail call i32 @llvm.r600.read.global.size.x() #1
+  %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1
+  %mul = mul i32 %y.i18, %x.i
+  %z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1
+  %mul3 = mul i32 %mul, %z.i17
+  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
+  %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
+  %mul26.i = mul i32 %x.i12.i, %x.i.i
+  %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.i16 = add i32 %x.i4.i, %mul26.i
+  %mul7 = mul i32 %add.i16, %y.i18
+  %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
+  %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
+  %mul30.i = mul i32 %y.i14.i, %y.i.i
+  %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %add.i14 = add i32 %mul30.i, %mul7
+  %mul819 = add i32 %add.i14, %y.i6.i
+  %add = mul i32 %mul819, %z.i17
+  %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
+  %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
+  %mul33.i = mul i32 %z.i16.i, %z.i.i
+  %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
+  %add.i = add i32 %z.i8.i, %mul33.i
+  %add13 = add i32 %add.i, %add
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
+  store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4, !tbaa !3
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.z() #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!opencl.kernels = !{!0, !1, !2}
+
+!0 = metadata !{null}
+!1 = metadata !{null}
+!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
+!3 = metadata !{metadata !"int", metadata !4}
+!4 = metadata !{metadata !"omnipotent char", metadata !5}
+!5 = metadata !{metadata !"Simple C/C++ TBAA"}
author	Vincent Lejeune <vljn@ovi.com>
	Wed, 31 Jul 2013 19:31:56 +0000 (19:31 +0000)
committer	Vincent Lejeune <vljn@ovi.com>
	Wed, 31 Jul 2013 19:31:56 +0000 (19:31 +0000)
lib/Target/R600/R600InstrInfo.cpp		patch \| blob \| history
lib/Target/R600/R600MachineScheduler.cpp		patch \| blob \| history
lib/Target/R600/R600MachineScheduler.h		patch \| blob \| history
lib/Target/R600/R600Packetizer.cpp		patch \| blob \| history
test/CodeGen/R600/and.ll		patch \| blob \| history
test/CodeGen/R600/fadd.ll		patch \| blob \| history
test/CodeGen/R600/fcmp-cnd.ll		patch \| blob \| history
test/CodeGen/R600/fcmp.ll		patch \| blob \| history
test/CodeGen/R600/fdiv.ll		patch \| blob \| history
test/CodeGen/R600/fmul.ll		patch \| blob \| history
test/CodeGen/R600/fmul.v4f32.ll		patch \| blob \| history
test/CodeGen/R600/fneg.ll		patch \| blob \| history
test/CodeGen/R600/fp_to_sint.ll		patch \| blob \| history
test/CodeGen/R600/fp_to_uint.ll		patch \| blob \| history
test/CodeGen/R600/fsub.ll		patch \| blob \| history
test/CodeGen/R600/icmp-select-sete-reverse-args.ll		patch \| blob \| history
test/CodeGen/R600/kcache-fold.ll		patch \| blob \| history
test/CodeGen/R600/literals.ll		patch \| blob \| history
test/CodeGen/R600/llvm.AMDGPU.trunc.ll		patch \| blob \| history
test/CodeGen/R600/local-memory-two-objects.ll		patch \| blob \| history
test/CodeGen/R600/rotr.ll		patch \| blob \| history
test/CodeGen/R600/selectcc-cnd.ll		patch \| blob \| history
test/CodeGen/R600/selectcc-cnde-int.ll		patch \| blob \| history
test/CodeGen/R600/set-dx10.ll		patch \| blob \| history
test/CodeGen/R600/sub.ll		patch \| blob \| history
test/CodeGen/R600/unsupported-cc.ll		patch \| blob \| history
test/CodeGen/R600/vselect.ll		patch \| blob \| history
test/CodeGen/R600/work-item-intrinsics.ll		patch \| blob \| history
test/CodeGen/R600/wrong-transalu-pos-fix.ll	[new file with mode: 0644]	patch \| blob