// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
FR64X:$src)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>;
+ (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
if (X86::VK8RegClass.hasSubClassEq(RC) ||
X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
-
- if (X86::FR32XRegClass.hasSubClassEq(RC))
+ if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
- if (X86::FR64XRegClass.hasSubClassEq(RC))
+ if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
- if (X86::VR128XRegClass.hasSubClassEq(RC) ||
- X86::VR256XRegClass.hasSubClassEq(RC) ||
- X86::VR512RegClass.hasSubClassEq(RC))
+ if (X86::VR512RegClass.hasSubClassEq(RC))
return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
%res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
ret <4 x i32>%res
}
+
+; CHECK-LABEL: @test15
+; CHECK: vmovdz (%rdi)
+; CHECK: ret
+define <4 x i32> @test15(i32* %x) {
+ %y = load i32* %x, align 4
+ %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
+ ret <4 x i32>%res
+}