From: Jun Bum Lim Date: Mon, 21 Dec 2015 15:36:49 +0000 (+0000) Subject: Revert "[AArch64] Promote loads from stores" X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=35733957dfcbcde0ba74357b7673165edae944f1 Revert "[AArch64] Promote loads from stores" This reverts commit r256004 due to a failure in cortex-a53. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256160 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 05dce507e58..27d569d7043 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -43,7 +43,6 @@ STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); -STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); static cl::opt ScanLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); @@ -94,12 +93,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, unsigned Limit); - - // Scan the instructions looking for a store that writes to the address from - // which the current load instruction reads. Return true if one is found. - bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, - MachineBasicBlock::iterator &StoreI); - // Merge the two instructions indicated into a single pair-wise instruction. // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction @@ -109,11 +102,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags); - // Promote the load that reads directly from the address stored to. - MachineBasicBlock::iterator - promoteLoadFromStore(MachineBasicBlock::iterator LoadI, - MachineBasicBlock::iterator StoreI); - // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan forwards. @@ -140,9 +128,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge foldable ldr/str instructions. bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); - // Find and promote load instructions which read directly from store. - bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); - // Check if converting two narrow loads into a single wider load with // bitfield extracts could be enabled. bool enableNarrowLdMerge(MachineFunction &Fn); @@ -414,36 +399,6 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { } } -static unsigned isMatchingStore(MachineInstr *LoadInst, - MachineInstr *StoreInst) { - unsigned LdOpc = LoadInst->getOpcode(); - unsigned StOpc = StoreInst->getOpcode(); - switch (LdOpc) { - default: - llvm_unreachable("Unsupported load instruction!"); - case AArch64::LDRBBui: - return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || - StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; - case AArch64::LDURBBi: - return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || - StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; - case AArch64::LDRHHui: - return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || - StOpc == AArch64::STRXui; - case AArch64::LDURHHi: - return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || - StOpc == AArch64::STURXi; - case AArch64::LDRWui: - return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; - case AArch64::LDURWi: - return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; - case AArch64::LDRXui: - return StOpc == AArch64::STRXui; - case AArch64::LDURXi: - return StOpc == AArch64::STURXi; - } -} - static unsigned getPreIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -598,21 +553,6 @@ static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { return MI->getOperand(Idx); } -static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, - MachineInstr *StoreInst) { - assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); - int LoadSize = getMemScale(LoadInst); - int StoreSize = getMemScale(StoreInst); - int UnscaledStOffset = isUnscaledLdSt(StoreInst) - ? getLdStOffsetOp(StoreInst).getImm() - : getLdStOffsetOp(StoreInst).getImm() * StoreSize; - int UnscaledLdOffset = isUnscaledLdSt(LoadInst) - ? getLdStOffsetOp(LoadInst).getImm() - : getLdStOffsetOp(LoadInst).getImm() * LoadSize; - return (UnscaledStOffset <= UnscaledLdOffset) && - (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); -} - // Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI. static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1) { @@ -860,106 +800,6 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, return NextI; } -MachineBasicBlock::iterator -AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, - MachineBasicBlock::iterator StoreI) { - MachineBasicBlock::iterator NextI = LoadI; - ++NextI; - - int LoadSize = getMemScale(LoadI); - int StoreSize = getMemScale(StoreI); - unsigned LdRt = getLdStRegOp(LoadI).getReg(); - unsigned StRt = getLdStRegOp(StoreI).getReg(); - bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); - - assert((IsStoreXReg || - TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && - "Unexpected RegClass"); - - MachineInstr *BitExtMI; - if (LoadSize == StoreSize) { - // Remove the load, if the destination register of the loads is the same - // register for stored value. - if (StRt == LdRt) { - DEBUG(dbgs() << "Remove load instruction:\n "); - DEBUG(LoadI->print(dbgs())); - DEBUG(dbgs() << "\n"); - LoadI->eraseFromParent(); - return NextI; - } - // Replace the load with a mov if the load and store are in the same size. - BitExtMI = - BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), - TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) - .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) - .addReg(StRt) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); - } else { - // FIXME: Currently we disable this transformation in big-endian targets as - // performance and correctness are verified only in little-endian. - if (!Subtarget->isLittleEndian()) - return NextI; - bool IsUnscaled = isUnscaledLdSt(LoadI); - assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match"); - assert(LoadSize < StoreSize && "Invalid load size"); - int UnscaledLdOffset = IsUnscaled - ? getLdStOffsetOp(LoadI).getImm() - : getLdStOffsetOp(LoadI).getImm() * LoadSize; - int UnscaledStOffset = IsUnscaled - ? getLdStOffsetOp(StoreI).getImm() - : getLdStOffsetOp(StoreI).getImm() * StoreSize; - int Width = LoadSize * 8; - int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); - int Imms = Immr + Width - 1; - unsigned DestReg = IsStoreXReg - ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, - &AArch64::GPR64RegClass) - : LdRt; - - assert(((UnscaledLdOffset) >= UnscaledStOffset && - (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && - "Invalid offset"); - - Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); - Imms = Immr + Width - 1; - if (UnscaledLdOffset == UnscaledStOffset) { - uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N - | ((Immr) << 6) // immr - | ((Imms) << 0) // imms - ; - - BitExtMI = - BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), - TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), - DestReg) - .addReg(StRt) - .addImm(AndMaskEncoded); - } else { - BitExtMI = - BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), - TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), - DestReg) - .addReg(StRt) - .addImm(Immr) - .addImm(Imms); - } - } - - DEBUG(dbgs() << "Promoting load by replacing :\n "); - DEBUG(StoreI->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(LoadI->print(dbgs())); - DEBUG(dbgs() << " with instructions:\n "); - DEBUG(StoreI->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG((BitExtMI)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions. - LoadI->eraseFromParent(); - return NextI; -} - /// trackRegDefsUses - Remember what registers the specified instruction uses /// and modifies. static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, @@ -1023,60 +863,6 @@ static bool mayAlias(MachineInstr *MIa, return false; } -bool AArch64LoadStoreOpt::findMatchingStore( - MachineBasicBlock::iterator I, unsigned Limit, - MachineBasicBlock::iterator &StoreI) { - MachineBasicBlock::iterator E = I->getParent()->begin(); - MachineBasicBlock::iterator MBBI = I; - MachineInstr *FirstMI = I; - unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); - - // Track which registers have been modified and used between the first insn - // and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - - for (unsigned Count = 0; MBBI != E && Count < Limit;) { - --MBBI; - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - // Now that we know this is a real instruction, count it. - ++Count; - - // If the load instruction reads directly from the address to which the - // store instruction writes and the stored value is not modified, we can - // promote the load. Since we do not handle stores with pre-/post-index, - // it's unnecessary to check if BaseReg is modified by the store itself. - if (MI->mayStore() && isMatchingStore(FirstMI, MI) && - BaseReg == getLdStBaseOp(MI).getReg() && - isLdOffsetInRangeOfSt(FirstMI, MI) && - !ModifiedRegs[getLdStRegOp(MI).getReg()]) { - StoreI = MBBI; - return true; - } - - if (MI->isCall()) - return false; - - // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg]) - return false; - - // If we encounter a store aliased with the load, return early. - if (MI->mayStore() && mayAlias(FirstMI, MI, TII)) - return false; - } - return false; -} - /// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator @@ -1477,31 +1263,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } -bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( - MachineBasicBlock::iterator &MBBI) { - MachineInstr *MI = MBBI; - // If this is a volatile load, don't mess with it. - if (MI->hasOrderedMemoryRef()) - return false; - - // Make sure this is a reg+imm. - // FIXME: It is possible to extend it to handle reg+reg cases. - if (!getLdStOffsetOp(MI).isImm()) - return false; - - // Look backward up to ScanLimit instructions. - MachineBasicBlock::iterator StoreI; - if (findMatchingStore(MBBI, ScanLimit, StoreI)) { - ++NumLoadsFromStoresPromoted; - // Promote the load. Keeping the iterator straight is a - // pain, so we let the merge routine tell us what the next instruction - // is after it's done mucking about. - MBBI = promoteLoadFromStore(MBBI, StoreI); - return true; - } - return false; -} - bool AArch64LoadStoreOpt::tryToMergeLdStInst( MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = MBBI; @@ -1546,16 +1307,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt) { bool Modified = false; // Three tranformations to do here: - // 1) Find loads that directly read from stores and promote them by - // replacing with mov instructions. If the store is wider than the load, - // the load will be replaced with a bitfield extract. - // e.g., - // str w1, [x0, #4] - // ldrh w2, [x0, #6] - // ; becomes - // str w1, [x0, #4] - // lsr w2, w1, #16 - // 2) Find narrow loads that can be converted into a single wider load + // 1) Find narrow loads that can be converted into a single wider load // with bitfield extract instructions. // e.g., // ldrh w0, [x2] @@ -1564,14 +1316,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldr w0, [x2] // ubfx w1, w0, #16, #16 // and w0, w0, #ffff - // 3) Find loads and stores that can be merged into a single load or store + // 2) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] - // 4) Find base register updates that can be merged into the load or store + // 3) Find base register updates that can be merged into the load or store // as a base-reg writeback. // e.g., // ldr x0, [x2] @@ -1579,35 +1331,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ; becomes // ldr x0, [x2], #4 - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - MachineInstr *MI = MBBI; - switch (MI->getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::LDRBBui: - case AArch64::LDRHHui: - case AArch64::LDRWui: - case AArch64::LDRXui: - // Unscaled instructions. - case AArch64::LDURBBi: - case AArch64::LDURHHi: - case AArch64::LDURWi: - case AArch64::LDURXi: { - if (tryToPromoteLoadFromStore(MBBI)) { - Modified = true; - break; - } - ++MBBI; - break; - } - // FIXME: Do the other instructions. - } - } - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); enableNarrowLdOpt && MBBI != E;) { MachineInstr *MI = MBBI; diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll index 36424506bee..eb0cd3547bd 100644 --- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll +++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s ; CHECK: foo -; CHECK: str w[[REG0:[0-9]+]], [x19, #264] -; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]] -; CHECK: str w[[REG1]], [x19, #132] +; CHECK: ldr w[[REG:[0-9]+]], [x19, #264] +; CHECK: str w[[REG]], [x19, #132] +; CHECK: ldr w{{[0-9]+}}, [x19, #264] define i32 @foo(i32 %a) nounwind { %retval = alloca i32, align 4 diff --git a/test/CodeGen/AArch64/arm64-ld-from-st.ll b/test/CodeGen/AArch64/arm64-ld-from-st.ll deleted file mode 100644 index 5013ce6c1d4..00000000000 --- a/test/CodeGen/AArch64/arm64-ld-from-st.ll +++ /dev/null @@ -1,666 +0,0 @@ -; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s - -; CHECK-LABEL: Str64Ldr64 -; CHECK: mov x0, x1 -define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i64* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1 - %1 = load i64, i64* %arrayidx1 - ret i64 %1 -} - -; CHECK-LABEL: Str64Ldr32_0 -; CHECK: and x0, x1, #0xffffffff -define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i32* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2 - %1 = load i32, i32* %arrayidx1 - ret i32 %1 -} - -; CHECK-LABEL: Str64Ldr32_1 -; CHECK: lsr x0, x1, #32 -define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i32* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3 - %1 = load i32, i32* %arrayidx1 - ret i32 %1 -} - -; CHECK-LABEL: Str64Ldr16_0 -; CHECK: and x0, x1, #0xffff -define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str64Ldr16_1 -; CHECK: ubfx x0, x1, #16, #16 -define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str64Ldr16_2 -; CHECK: ubfx x0, x1, #32, #16 -define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str64Ldr16_3 -; CHECK: lsr x0, x1, #48 -define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str64Ldr8_0 -; CHECK: and x0, x1, #0xff -define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_1 -; CHECK: ubfx x0, x1, #8, #8 -define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_2 -; CHECK: ubfx x0, x1, #16, #8 -define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_3 -; CHECK: ubfx x0, x1, #24, #8 -define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_4 -; CHECK: ubfx x0, x1, #32, #8 -define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_5 -; CHECK: ubfx x0, x1, #40, #8 -define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_6 -; CHECK: ubfx x0, x1, #48, #8 -define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str64Ldr8_7 -; CHECK: lsr x0, x1, #56 -define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str32Ldr32 -; CHECK: mov w0, w1 -define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i32* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1 - %1 = load i32, i32* %arrayidx1 - ret i32 %1 -} - -; CHECK-LABEL: Str32Ldr16_0 -; CHECK: and w0, w1, #0xffff -define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str32Ldr16_1 -; CHECK: lsr w0, w1, #16 -define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str32Ldr8_0 -; CHECK: and w0, w1, #0xff -define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str32Ldr8_1 -; CHECK: ubfx w0, w1, #8, #8 -define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str32Ldr8_2 -; CHECK: ubfx w0, w1, #16, #8 -define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str32Ldr8_3 -; CHECK: lsr w0, w1, #24 -define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str16Ldr16 -; CHECK: mov w0, w1 -define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) { -entry: - %0 = bitcast i16* %P to i16* - %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1 - store i16 %v, i16* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Str16Ldr8_0 -; CHECK: and w0, w1, #0xff -define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) { -entry: - %0 = bitcast i16* %P to i8* - %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1 - store i16 %v, i16* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Str16Ldr8_1 -; CHECK: ubfx w0, w1, #8, #8 -define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) { -entry: - %0 = bitcast i16* %P to i8* - %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1 - store i16 %v, i16* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - - -; CHECK-LABEL: Unscaled_Str64Ldr64 -; CHECK: mov x0, x1 -define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i64* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1 - %1 = load i64, i64* %arrayidx1 - ret i64 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr32_0 -; CHECK: and x0, x1, #0xffffffff -define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i32* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2 - %1 = load i32, i32* %arrayidx1 - ret i32 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr32_1 -; CHECK: lsr x0, x1, #32 -define i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i32* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1 - %1 = load i32, i32* %arrayidx1 - ret i32 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr16_0 -; CHECK: and x0, x1, #0xffff -define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr16_1 -; CHECK: ubfx x0, x1, #16, #16 -define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr16_2 -; CHECK: ubfx x0, x1, #32, #16 -define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr16_3 -; CHECK: lsr x0, x1, #48 -define i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i16* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_0 -; CHECK: and x0, x1, #0xff -define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_1 -; CHECK: ubfx x0, x1, #8, #8 -define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_2 -; CHECK: ubfx x0, x1, #16, #8 -define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_3 -; CHECK: ubfx x0, x1, #24, #8 -define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_4 -; CHECK: ubfx x0, x1, #32, #8 -define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_5 -; CHECK: ubfx x0, x1, #40, #8 -define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_6 -; CHECK: ubfx x0, x1, #48, #8 -define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str64Ldr8_7 -; CHECK: lsr x0, x1, #56 -define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) { -entry: - %0 = bitcast i64* %P to i8* - %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr32 -; CHECK: mov w0, w1 -define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i32* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1 - %1 = load i32, i32* %arrayidx1 - ret i32 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr16_0 -; CHECK: and w0, w1, #0xffff -define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr16_1 -; CHECK: lsr w0, w1, #16 -define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr8_0 -; CHECK: and w0, w1, #0xff -define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr8_1 -; CHECK: ubfx w0, w1, #8, #8 -define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr8_2 -; CHECK: ubfx w0, w1, #16, #8 -define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str32Ldr8_3 -; CHECK: lsr w0, w1, #24 -define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i8* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str16Ldr16 -; CHECK: mov w0, w1 -define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) { -entry: - %0 = bitcast i16* %P to i16* - %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1 - store i16 %v, i16* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_Str16Ldr8_0 -; CHECK: and w0, w1, #0xff -define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) { -entry: - %0 = bitcast i16* %P to i8* - %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1 - store i16 %v, i16* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: Unscaled_Str16Ldr8_1 -; CHECK: ubfx w0, w1, #8, #8 -define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) { -entry: - %0 = bitcast i16* %P to i8* - %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1 - store i16 %v, i16* %arrayidx0 - %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1 - %1 = load i8, i8* %arrayidx1 - ret i8 %1 -} - -; CHECK-LABEL: StrVolatileLdr -; CHECK: ldrh -define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2 - %1 = load volatile i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: StrNotInRangeLdr -; CHECK: ldrh -define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: Unscaled_StrNotInRangeLdr -; CHECK: ldurh -define i16 @Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 - store i32 %v, i32* %arrayidx0 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -; CHECK-LABEL: StrCallLdr -; CHECK: ldrh -define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - %c = call i1 @test_dummy() - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} - -declare i1 @test_dummy() - -; CHECK-LABEL: StrStrLdr -; CHECK: ldrh -define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) { -entry: - %0 = bitcast i32* %P to i16* - %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 - store i32 %v, i32* %arrayidx0 - store i32 %n, i32* %P2 - %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2 - %1 = load i16, i16* %arrayidx1 - ret i16 %1 -} diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll index 2062f8b8b38..0d301bbd502 100644 --- a/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -27,8 +27,8 @@ define i64 @test_chains() { ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]] -; CHECK: mov {{w[0-9]+}}, w[[STRVAL]] +; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]] +; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]] %ret.1 = load i8, i8* %locvar %ret.2 = zext i8 %ret.1 to i64