ARM: add cyclone CPU with ZeroCycleZeroing feature.

author Tim Northover <tnorthover@apple.com>

Tue, 1 Apr 2014 13:22:02 +0000 (13:22 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 1 Apr 2014 13:22:02 +0000 (13:22 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 1 Apr 2014 13:22:02 +0000 (13:22 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 1 Apr 2014 13:22:02 +0000 (13:22 +0000)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td

index 27bbcc22b640d4a0f12c65922e4209b3ed368184..7916ccc180c8f02aa73ecec44134efb6d7487c24 100644 (file)
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -73,6 +73,11 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
  def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
                            "Enable support for CRC instructions">;
  
+// Cyclone has preferred instructions for zeroing VFP registers, which can
+// execute in 0 cycles.
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+                                        "Has zero-cycle zeroing instructions">;
+
  // Some processors have FP multiply-accumulate instructions that don't
  // play nicely with other VFP / NEON instructions, and it's generally better
  // to just not use them.
@@ -361,6 +366,13 @@ def : ProcessorModel<"cortex-a15",   CortexA9Model,
                                       FeatureDSPThumb2, FeatureHasRAS,
                                       FeatureAClass]>;
  
+// FIXME: krait has currently the same Schedule model as A9
+def : ProcessorModel<"krait",       CortexA9Model,
+                                    [ProcKrait, HasV7Ops,
+                                     FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2, FeatureHasRAS,
+                                     FeatureAClass]>;
+
  // FIXME: R5 has currently the same ProcessorModel as A8.
  def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                      [ProcR5, HasV7Ops, FeatureDB,
@@ -395,12 +407,12 @@ def : ProcNoItin<"cortex-a57",      [ProcA57, HasV8Ops, FeatureAClass,
                                      FeatureDB, FeatureFPARMv8,
                                      FeatureNEON, FeatureDSPThumb2]>;
  
-// FIXME: krait has currently the same Schedule model as A9
-def : ProcessorModel<"krait",       CortexA9Model,
-                                    [ProcKrait, HasV7Ops,
-                                     FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS,
-                                     FeatureAClass]>;
+// Cyclone is very similar to swift
+def : ProcessorModel<"cyclone",     SwiftModel,
+                                    [ProcSwift, HasV8Ops, HasV7Ops,
+                                     FeatureCrypto, FeatureFPARMv8,
+                                     FeatureDB,FeatureDSPThumb2,
+                                     FeatureHasRAS, FeatureZCZeroing]>;
  
  //===----------------------------------------------------------------------===//
  // Register File Description
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index cc748e19c851cc4b24eb56e75b7ed7975c9ba72a..dfcc11edcd907e81e56185d51bc84cd19ba084b0 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -244,6 +244,7 @@ def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
  def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
                                   AssemblerPredicate<"FeatureTrustZone",
                                                      "TrustZone">;
+def HasZCZ           : Predicate<"Subtarget->hasZeroCycleZeroing()">;
  def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
  def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
  def IsThumb          : Predicate<"Subtarget->isThumb()">,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index b18eac55d88910c2d76ff5346e8acddcc5efcdf7..0d46c49bcf87c69b8a18331c612f1df50a2fa535 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -5245,6 +5245,26 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
                           [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
  } // isReMaterializable
  
+
+// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
+// require zero cycles to execute so they should be used wherever possible for
+// setting a register to zero.
+
+// Even without these pseudo-insts we would probably end up with the correct
+// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
+// since they are sometimes rather expensive (in general).
+
+let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+  def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
+                               [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
+                               (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
+               Requires<[HasZCZ]>;
+  def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
+                               [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
+                               (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
+               Requires<[HasZCZ]>;
+}
+
  //   VMOV     : Vector Get Lane (move scalar to ARM core register)
  
  def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp

index a290136f6bd7b373b54c126cb50a289990334a23..0dec1c406a43a0f7afa4d71f4145753c91495c72 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -134,6 +134,7 @@ void ARMSubtarget::initializeEnvironment() {
    HasTrustZone = false;
    HasCrypto = false;
    HasCRC = false;
+  HasZeroCycleZeroing = false;
    AllowsUnalignedMem = false;
    Thumb2DSP = false;
    UseNaClTrap = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index 2ce99c890f727b8be205966f63742e00e9db5a69..e76cc85a1aa1e062f30eac2b204560ab6112201d 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -177,6 +177,10 @@ protected:
    /// HasCRC - if true, processor supports CRC instructions
    bool HasCRC;
  
+  /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
+  /// particularly effective at zeroing a VFP register.
+  bool HasZeroCycleZeroing;
+
    /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
    /// accesses for some types.  For details, see
    /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -298,6 +302,7 @@ public:
    bool isFPOnlySP() const { return FPOnlySP; }
    bool hasPerfMon() const { return HasPerfMon; }
    bool hasTrustZone() const { return HasTrustZone; }
+  bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
    bool prefers32BitThumb() const { return Pref32BitThumb; }
    bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
    bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
diff --git a/test/CodeGen/ARM/zero-cycle-zero.ll b/test/CodeGen/ARM/zero-cycle-zero.ll

new file mode 100644 (file)

index 0000000..121a87f
--- /dev/null
+++ b/test/CodeGen/ARM/zero-cycle-zero.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE
+; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT
+
+declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>)
+
+define void @test_vec64() {
+; CHECK-CYCLONE-LABEL: test_vec64:
+; CHECK-SWIFT-LABEL: test_vec64:
+
+  call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
+  call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
+; CHECK-CYCLONE-NOT: vmov.f64 d0,
+; CHECK-CYCLONE: vmov.i32 d0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: vmov.i32 d0, #0
+; CHECK-CYCLONE: bl
+
+; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0
+; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>)
+
+define void @test_vec128() {
+; CHECK-CYCLONE-LABEL: test_vec128:
+; CHECK-SWIFT-LABEL: test_vec128:
+
+  call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-CYCLONE: vmov.i32 q0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: vmov.i32 q0, #0
+; CHECK-CYCLONE: bl
+
+; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0
+; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+
+  ret void
+}
+
+declare void @take_i32(i32)
+
+define void @test_i32() {
+; CHECK-CYCLONE-LABEL: test_i32:
+; CHECK-SWIFT-LABEL: test_i32:
+
+  call arm_aapcs_vfpcc void @take_i32(i32 0)
+  call arm_aapcs_vfpcc void @take_i32(i32 0)
+; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-CYCLONE: mov r0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: mov r0, #0
+; CHECK-CYCLONE: bl
+
+; It doesn't particularly matter what Swift does here, there isn't carefully
+; crafted behaviour that we might break in Cyclone.
+
+  ret void
+}
author	Tim Northover <tnorthover@apple.com>
	Tue, 1 Apr 2014 13:22:02 +0000 (13:22 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 1 Apr 2014 13:22:02 +0000 (13:22 +0000)
lib/Target/ARM/ARM.td		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
test/CodeGen/ARM/zero-cycle-zero.ll	[new file with mode: 0644]	patch \| blob