LoopVectorize: Scalarize padded types

author Arnold Schwaighofer <aschwaighofer@apple.com>

Wed, 24 Apr 2013 16:16:01 +0000 (16:16 +0000)

committer Arnold Schwaighofer <aschwaighofer@apple.com>

Wed, 24 Apr 2013 16:16:01 +0000 (16:16 +0000)
author Arnold Schwaighofer <aschwaighofer@apple.com>
Wed, 24 Apr 2013 16:16:01 +0000 (16:16 +0000)
committer Arnold Schwaighofer <aschwaighofer@apple.com>
Wed, 24 Apr 2013 16:16:01 +0000 (16:16 +0000)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index ac0925df2a09d0f9dbffe480a9b2fba4dbd9d12e..56a9a2d3d45c7aff89905a331380dde16fb20258 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -956,6 +956,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
    Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
    unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
  
+  unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
+  unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+
+  if (ScalarAllocatedSize != VectorElementSize)
+    return scalarizeInstruction(Instr);
+
    // If the pointer is loop invariant or if it is non consecutive,
    // scalarize the load.
    int Stride = Legal->isConsecutivePtr(Ptr);
@@ -3558,7 +3564,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
      // Scalarized loads/stores.
      int Stride = Legal->isConsecutivePtr(Ptr);
      bool Reverse = Stride < 0;
-    if (0 == Stride) {
+    unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
+    unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+    if (0 == Stride || ScalarAllocatedSize != VectorElementSize) {
        unsigned Cost = 0;
        // The cost of extracting from the value vector and pointer vector.
        Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll

new file mode 100644 (file)

index 0000000..b66119f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -0,0 +1,29 @@
+; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@x = common global [1024 x x86_fp80] zeroinitializer, align 16
+
+;CHECK: @example
+;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>*
+;CHECK: store
+;CHECK: ret void
+
+define void @example() nounwind ssp uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %conv = sitofp i32 1 to x86_fp80
+  %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
+  store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
author	Arnold Schwaighofer <aschwaighofer@apple.com>
	Wed, 24 Apr 2013 16:16:01 +0000 (16:16 +0000)
committer	Arnold Schwaighofer <aschwaighofer@apple.com>
	Wed, 24 Apr 2013 16:16:01 +0000 (16:16 +0000)
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll	[new file with mode: 0644]	patch \| blob