def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q64 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
- (VLD1q32 addrmode6:$addr)>;
+ (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q32 addrmode6:$addr, QPR:$value)>;
+ (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
(VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
//===----------------------------------------------------------------------===//
// bit_convert
-def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+}
def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+}
def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+}
def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+}
def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+}
-def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+}
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+}
def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+}
def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+}
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+}
+
+let Predicates = [IsBE] in {
+ // 64 bit conversions
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+
+ // 128 bit conversions
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+}
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
dsub_0)>;
}
+// The following class definition is basically a copy of the
+// Lengthen_HalfSingle definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+}
+
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length.
//
dsub_0))>;
}
+// The following class definition is basically a copy of the
+// Lengthen_Double definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+}
+
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length, but which ends up only
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
dsub_0)>;
}
+// The following class definition is basically a copy of the
+// Lengthen_HalfDouble definition above, however with an additional VREV16d8
+// instruction to convert data loaded by VLD1LN into proper vector format
+// in big endian mode.
+multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+}
+
defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
-defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
-defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+let Predicates = [IsLE] in {
+ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+}
+
+let Predicates = [IsBE] in {
+ defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
-// Double lengthening - v4i8 -> v4i16 -> v4i32
-defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i8 -> v2i16 -> v2i32
-defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i16 -> v2i32 -> v2i64
-defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
+}
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
- (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
- (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
- (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+}
+// The following patterns are basically a copy of the patterns above,
+// however with an additional VREV16d instruction to convert data
+// loaded by VLD1LN into proper vector format in big endian mode.
+let Predicates = [IsBE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+}
//===----------------------------------------------------------------------===//
// Assembler aliases