; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s ;; This test ensures that MergeConsecutiveStores does not attempt to ;; merge stores or loads when doing so would result in unaligned ;; memory operations (unless the target supports those, e.g. X86). ;; This issue happen in other situations for other targets, but PPC ;; with Altivec extensions was chosen for the test because it does not ;; support unaligned access with AltiVec instructions. If the 4 ;; load/stores get merged to an v4i32 vector type severely bad code ;; gets generated: it painstakingly copies the values to a temporary ;; location on the stack, with vector ops, in order to then use ;; integer ops to load from the temporary stack location and store to ;; the final location. Yuck! %struct.X = type { i32, i32, i32, i32 } @fx = common global %struct.X zeroinitializer, align 4 @fy = common global %struct.X zeroinitializer, align 4 ;; In this test case, lvx and stvx instructions should NOT be ;; generated, as the alignment is not sufficient for it to be ;; worthwhile. ;; CHECK-LABEL: f: ;; CHECK: lwzu ;; CHECK-NEXT: lwz ;; CHECK-NEXT: lwz ;; CHECK-NEXT: lwz ;; CHECK-NEXT: stwu ;; CHECK-NEXT: stw ;; CHECK-NEXT: stw ;; CHECK-NEXT: stw ;; CHECK-NEXT: blr define void @f() { entry: %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4 %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4 %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4 %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4 store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4 store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4 store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4 store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4 ret void } @gx = common global %struct.X zeroinitializer, align 16 @gy = common global %struct.X zeroinitializer, align 16 ;; In this test, lvx and stvx instructions SHOULD be generated, as ;; the 16-byte alignment of the new load/store is acceptable. ;; CHECK-LABEL: g: ;; CHECK: lvx ;; CHECK: stvx ;; CHECK: blr define void @g() { entry: %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16 %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4 %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4 %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4 store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16 store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4 store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4 store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4 ret void }