[AArch64] Fix halfword load merging for big-endian targets

author Oliver Stannard <oliver.stannard@arm.com>

Tue, 10 Nov 2015 11:04:18 +0000 (11:04 +0000)

committer Oliver Stannard <oliver.stannard@arm.com>

Tue, 10 Nov 2015 11:04:18 +0000 (11:04 +0000)
author Oliver Stannard <oliver.stannard@arm.com>
Tue, 10 Nov 2015 11:04:18 +0000 (11:04 +0000)
committer Oliver Stannard <oliver.stannard@arm.com>
Tue, 10 Nov 2015 11:04:18 +0000 (11:04 +0000)
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

index 6ef4c269d8fe9d4f18dad04b714da518d90160c3..ffe6ab2cb53dc7b8d3ab7463ad8dd5adc70b69a1 100644 (file)
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -84,6 +84,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
  
    const AArch64InstrInfo *TII;
    const TargetRegisterInfo *TRI;
  
    const AArch64InstrInfo *TII;
    const TargetRegisterInfo *TRI;
+  const AArch64Subtarget *Subtarget;
  
    // Scan the instructions looking for a load/store that can be combined
    // with the current instruction into a load/store pair.
  
    // Scan the instructions looking for a load/store that can be combined
    // with the current instruction into a load/store pair.
@@ -537,6 +538,10 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
      if (!IsUnscaled)
        OffsetImm /= 2;
      MachineInstr *RtNewDest = MergeForward ? I : Paired;
      if (!IsUnscaled)
        OffsetImm /= 2;
      MachineInstr *RtNewDest = MergeForward ? I : Paired;
+    // When merging small (< 32 bit) loads for big-endian targets, the order of
+    // the component parts gets swapped.
+    if (!Subtarget->isLittleEndian())
+      std::swap(RtMI, Rt2MI);
      // Construct the new load instruction.
      // FIXME: currently we support only halfword unsigned load. We need to
      // handle byte type, signed, and store instructions as well.
      // Construct the new load instruction.
      // FIXME: currently we support only halfword unsigned load. We need to
      // handle byte type, signed, and store instructions as well.
@@ -560,7 +565,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
      DEBUG((NewMemMI)->print(dbgs()));
  
      MachineInstr *ExtDestMI = MergeForward ? Paired : I;
      DEBUG((NewMemMI)->print(dbgs()));
  
      MachineInstr *ExtDestMI = MergeForward ? Paired : I;
-    if (ExtDestMI == Rt2MI) {
+    if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
        // Create the bitfield extract for high half.
        BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
                            TII->get(AArch64::UBFMWri))
        // Create the bitfield extract for high half.
        BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
                            TII->get(AArch64::UBFMWri))
@@ -1388,8 +1393,9 @@ bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
  }
  
  bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  }
  
  bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
-  TRI = Fn.getSubtarget().getRegisterInfo();
+  Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
+  TRI = Subtarget->getRegisterInfo();
  
    bool Modified = false;
    bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
  
    bool Modified = false;
    bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
diff --git a/test/CodeGen/AArch64/arm64-ldr-merge.ll b/test/CodeGen/AArch64/arm64-ldr-merge.ll

index 4e40bac4e71f485ff125c6448afa8ec36b20630e..5d8cb8d745da14dcad5c39042bc9dc8d94afe7ee 100644 (file)
--- a/test/CodeGen/AArch64/arm64-ldr-merge.ll
+++ b/test/CodeGen/AArch64/arm64-ldr-merge.ll
@@ -1,36 +1,51 @@
-; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
+; RUN: llc < %s -march=aarch64_be -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
  
  ; CHECK-LABEL: Ldrh_merge
  ; CHECK-NOT: ldrh
  ; CHECK: ldr [[NEW_DEST:w[0-9]+]]
  
  ; CHECK-LABEL: Ldrh_merge
  ; CHECK-NOT: ldrh
  ; CHECK: ldr [[NEW_DEST:w[0-9]+]]
-; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
-; CHECK: lsr  w{{[0-9]+}}, [[NEW_DEST]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
  define i16 @Ldrh_merge(i16* nocapture readonly %p) {
    %1 = load i16, i16* %p, align 2
    %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
    %2 = load i16, i16* %arrayidx2, align 2
  define i16 @Ldrh_merge(i16* nocapture readonly %p) {
    %1 = load i16, i16* %p, align 2
    %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
    %2 = load i16, i16* %arrayidx2, align 2
-  %add = add nuw nsw i16 %1, %2
+  %add = sub nuw nsw i16 %1, %2
    ret i16 %add
  }
  
  ; CHECK-LABEL: Ldurh_merge
  ; CHECK-NOT: ldurh
  ; CHECK: ldur [[NEW_DEST:w[0-9]+]]
    ret i16 %add
  }
  
  ; CHECK-LABEL: Ldurh_merge
  ; CHECK-NOT: ldurh
  ; CHECK: ldur [[NEW_DEST:w[0-9]+]]
-; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
-; CHECK: lsr  w{{[0-9]+}}, [[NEW_DEST]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; CHECK-DAG: lsr  [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
  define i16 @Ldurh_merge(i16* nocapture readonly %p)  {
  entry:
    %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
    %0 = load i16, i16* %arrayidx
    %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
    %1 = load i16, i16* %arrayidx3
  define i16 @Ldurh_merge(i16* nocapture readonly %p)  {
  entry:
    %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
    %0 = load i16, i16* %arrayidx
    %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
    %1 = load i16, i16* %arrayidx3
-  %add = add nuw nsw i16 %0, %1
+  %add = sub nuw nsw i16 %0, %1
    ret i16 %add
  }
  
  ; CHECK-LABEL: Ldrh_4_merge
  ; CHECK-NOT: ldrh
    ret i16 %add
  }
  
  ; CHECK-LABEL: Ldrh_4_merge
  ; CHECK-NOT: ldrh
-; CHECK: ldp [[NEW_DEST:w[0-9]+]]
+; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0]
+; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff
+; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16
+; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff
+; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16
+; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]]
+; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]]
+; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]]
+; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]]
+; LE: sub w0, [[TEMP2]], [[WORD2HI]]
+; BE: sub w0, [[TEMP2]], [[WORD2LO]]
  define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
    %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
    %l0 = load i16, i16* %arrayidx
  define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
    %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
    %l0 = load i16, i16* %arrayidx
@@ -40,8 +55,8 @@ define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
    %l2 = load i16, i16* %arrayidx7
    %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
    %l3 = load i16, i16* %arrayidx12
    %l2 = load i16, i16* %arrayidx7
    %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
    %l3 = load i16, i16* %arrayidx12
-  %add4 = add nuw nsw i16 %l1, %l0
-  %add9 = add nuw nsw i16 %add4, %l2
-  %add14 = add nuw nsw i16 %add9, %l3
+  %add4 = sub nuw nsw i16 %l1, %l0
+  %add9 = udiv i16 %add4, %l2
+  %add14 = sub nuw nsw i16 %add9, %l3
    ret i16 %add14
  }
    ret i16 %add14
  }
author	Oliver Stannard <oliver.stannard@arm.com>
	Tue, 10 Nov 2015 11:04:18 +0000 (11:04 +0000)
committer	Oliver Stannard <oliver.stannard@arm.com>
	Tue, 10 Nov 2015 11:04:18 +0000 (11:04 +0000)
lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-ldr-merge.ll		patch \| blob \| history