; CHECK: blr
}
+define <4 x float> @test32(<4 x float>* %a) {
+ %v = load <4 x float>* %a, align 16
+ ret <4 x float> %v
+
+; CHECK-LABEL: @test32
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test33(<4 x float>* %a, <4 x float> %b) {
+ store <4 x float> %b, <4 x float>* %a, align 16
+ ret void
+
+; CHECK-LABEL: @test33
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+ %v = load <4 x float>* %a, align 8
+ ret <4 x float> %v
+
+; CHECK-LABEL: @test32u
+; CHECK-DAG: lvsl
+; CHECK-DAG: lvx
+; CHECK-DAG: lvx
+; CHECK: vperm 2,
+; CHECK: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+ store <4 x float> %b, <4 x float>* %a, align 8
+ ret void
+
+; CHECK-LABEL: @test33u
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i32> @test34(<4 x i32>* %a) {
+ %v = load <4 x i32>* %a, align 16
+ ret <4 x i32> %v
+
+; CHECK-LABEL: @test34
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test35(<4 x i32>* %a, <4 x i32> %b) {
+ store <4 x i32> %b, <4 x i32>* %a, align 16
+ ret void
+
+; CHECK-LABEL: @test35
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
define <2 x double> @test40(<2 x i64> %a) {
%v = uitofp <2 x i64> %a to <2 x double>
ret <2 x double> %v
ret <2 x i32> %i
; CHECK-LABEL: @test80
-; CHECK: addi
-; CHECK: addi
-; CHECK: lxvd2x
+; CHECK-DAG: addi [[R1:[0-9]+]], 3, 3
+; CHECK-DAG: addi [[R2:[0-9]+]], 1, -16
+; CHECK-DAG: addi [[R3:[0-9]+]], 3, 2
+; CHECK: std [[R1]], -8(1)
+; CHECK: std [[R3]], -16(1)
+; CHECK: lxvd2x 34, 0, [[R2]]
; CHECK-NOT: stxvd2x
-; FIXME: We still make one vector for each vector element and this shuffle them
-; together instead of just composing one vector on the stack.
+; CHECK: blr
+}
+
+define <2 x double> @test81(<4 x float> %b) {
+ %w = bitcast <4 x float> %b to <2 x double>
+ ret <2 x double> %w
+
+; CHECK-LABEL: @test81
; CHECK: blr
}