On some cores it's a bad idea for performance to mix VFP and NEON instructions
and since these patterns are NEON anyway, the NEON load should be used.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155630
91177308-0d34-0410-b5e6-
96231b3b80d8
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
-// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
-// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+// Lengthen_Single<"8", "i16", "i8"> =
+// Pat<(v8i16 (extloadvi8 addrmode6oneL32:$addr))
+// (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0)))>;
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (VLD1LNd32 addrmode6oneL32:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (VLD1LNd32 addrmode6oneL32:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
- (VLDRD addrmode5:$addr))>;
+ (VLD1LNd32 addrmode6oneL32:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))>;
}
// extload, zextload and sextload for a lengthening load which only uses
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
}
// extload, zextload and sextload for a lengthening load which only uses
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
-// Pat<(v4i16 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
+// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
string InsnLanes, string InsnTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
string InsnLanes, string InsnTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
// (EXTRACT_SUBREG (VMOVLuv4i32
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)),
+// (i32 0))),
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0))>;
}
// extload, zextload and sextload for a lengthening load followed by another
}
// extload, zextload and sextload for a lengthening load followed by another
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
//
// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
//
// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
-// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
-// (EXTRACT_SUBREG (VMOVLuv4i32
-// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-// (VLDRS addrmode5:$addr),
-// ssub_0)),
-// dsub_0)),
-// dsub_0)>;
+// Pat<(v4i32 (extloadvi8 addrmode5:$addr))
+// (EXTRACT_SUBREG (VMOVLuv4i32
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (f64 (IMPLICIT_DEF)), (i32 0))),
+// dsub_0)),
+// dsub_0)>;
multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
string Insn2Ty> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
dsub_0)>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
dsub_0)>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
- (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
- ssub_0)), dsub_0)),
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
+ dsub_0)),
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+def : Pat<(v2i64 (extloadvi8 addrmode6oneL32:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+ (VLD1LNd32 addrmode6oneL32:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode6oneL32:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+ (VLD1LNd32 addrmode6oneL32:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode6oneL32:$addr)),
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
- dsub_0)), dsub_0))>;
+ (VLD1LNd32 addrmode6oneL32:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
//===----------------------------------------------------------------------===//
// Assembler aliases
//===----------------------------------------------------------------------===//
// Assembler aliases
; CHECK: g:
define float @g(<4 x i8>* nocapture %in) {
; CHECK: g:
define float @g(<4 x i8>* nocapture %in) {
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+ ; CHECK: vld1
; CHECK: vmovl.u8
; CHECK: vmovl.u16
%1 = load <4 x i8>* %in
; CHECK: vmovl.u8
; CHECK: vmovl.u16
%1 = load <4 x i8>* %in
; CHECK: i:
define <4 x i8> @i(<4 x i8>* %x) {
; CHECK: i:
define <4 x i8> @i(<4 x i8>* %x) {
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+ ; CHECK: vld1
; CHECK: vmovl.s8
; CHECK: vmovl.s16
; CHECK: vrecpe
; CHECK: vmovl.s8
; CHECK: vmovl.s16
; CHECK: vrecpe