[x86] fix allowsMisalignedMemoryAccess() implementation

author Sanjay Patel <spatel@rotateright.com>

Fri, 14 Aug 2015 17:53:40 +0000 (17:53 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 14 Aug 2015 17:53:40 +0000 (17:53 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 14 Aug 2015 17:53:40 +0000 (17:53 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 14 Aug 2015 17:53:40 +0000 (17:53 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 9a790010f8c2a80191441ef690b9dad635131d3b..68a15e021cd205c8ae682750713ca16e0a70d643 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1915,8 +1915,14 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                    unsigned,
                                                    unsigned,
                                                    bool *Fast) const {
-  if (Fast)
-    *Fast = Subtarget->isUnalignedMemAccessFast();
+  if (Fast) {
+    // FIXME: We should be checking 128-bit accesses separately from smaller
+    // accesses.
+    if (VT.getSizeInBits() == 256)
+      *Fast = !Subtarget->isUnalignedMem32Slow();
+    else
+      *Fast = Subtarget->isUnalignedMemAccessFast();
+  }
    return true;
  }
  
@@ -11259,14 +11265,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
    // --> load32 addr
    if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
        Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
-      OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
-      !Subtarget->isUnalignedMem32Slow()) {
-    SDValue SubVec2 = Vec.getOperand(1);
-    if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
-      if (Idx2->getZExtValue() == 0) {
-        SDValue Ops[] = { SubVec2, SubVec };
-        if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
-          return Ld;
+      OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+    auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+    if (Idx2 && Idx2->getZExtValue() == 0) {
+      SDValue SubVec2 = Vec.getOperand(1);
+      // If needed, look through a bitcast to get to the load.
+      if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+        SubVec2 = SubVec2.getOperand(0);
+      
+      if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+        bool Fast;
+        unsigned Alignment = FirstLd->getAlignment();
+        unsigned AS = FirstLd->getAddressSpace();
+        const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+        if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+                                    OpVT, AS, Alignment, &Fast) && Fast) {
+          SDValue Ops[] = { SubVec2, SubVec };
+          if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+            return Ld;
+        }
        }
      }
    }
diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll

index d979c16f4abddbfb28635d260dbecf4971a44d11..608c6e72fbe9492f0555d843f8173c740e62f794 100644 (file)
--- a/test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -75,12 +75,12 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
    ret <8 x float> %v3
  }
  
+; If the first load is 32-byte aligned, then the loads should be merged in all cases.
+
  define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
-;; FIXME: The first load is 32-byte aligned, so the second load should get merged.
  ; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
  ; AVXSLOW:       # BB#0:
-; AVXSLOW-NEXT:    vmovaps 48(%rdi), %xmm0
-; AVXSLOW-NEXT:    vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVXSLOW-NEXT:    vmovaps 48(%rdi), %ymm0
  ; AVXSLOW-NEXT:    retq
  ;
  ; AVXFAST-LABEL: combine_16_byte_loads_aligned:
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 14 Aug 2015 17:53:40 +0000 (17:53 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 14 Aug 2015 17:53:40 +0000 (17:53 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/unaligned-32-byte-memops.ll		patch \| blob \| history