From 94c25519a2acdc7853fb281dd39b31ca66aa7bad Mon Sep 17 00:00:00 2001 From: James Molloy Date: Fri, 12 Sep 2014 13:29:40 +0000 Subject: [PATCH] [ARM] Teach the cost model that cross-class copies are costly. Cross-class copies being expensive is actually a trait of the microarchitecture, but as I haven't yet seen an example of a microarchitecture where they're cheap it seems best to just enable this by default, covering the non-mcpu build case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 7 ++ test/Analysis/CostModel/ARM/cast.ll | 112 +++++++++++----------- 2 files changed, 63 insertions(+), 56 deletions(-) diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 75a556a261e..ec834e8da59 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -390,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->getScalarSizeInBits() <= 32) return 3; + // Cross-class copies are expensive on many microarchitectures, + // so assume they are expensive by default. + if ((Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement) && + ValTy->getVectorElementType()->isIntegerTy()) + return 3; + return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); } diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll index 662110f2720..18d6e841433 100644 --- a/test/Analysis/CostModel/ARM/cast.ll +++ b/test/Analysis/CostModel/ARM/cast.ll @@ -221,35 +221,35 @@ define i32 @casts() { %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r97 = fptosi <2 x float> undef to <2 x i32> - ; CHECK: cost of 28 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r98 = fptoui <2 x float> undef to <2 x i64> - ; CHECK: cost of 28 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r99 = fptosi <2 x float> undef to <2 x i64> - ; CHECK: cost of 8 {{.*}} fptoui + ; CHECK: cost of 16 {{.*}} fptoui %r100 = fptoui <2 x double> undef to <2 x i1> - ; CHECK: cost of 8 {{.*}} fptosi + ; CHECK: cost of 16 {{.*}} fptosi %r101 = fptosi <2 x double> undef to <2 x i1> - ; CHECK: cost of 8 {{.*}} fptoui + ; CHECK: cost of 16 {{.*}} fptoui %r102 = fptoui <2 x double> undef to <2 x i8> - ; CHECK: cost of 8 {{.*}} fptosi + ; CHECK: cost of 16 {{.*}} fptosi %r103 = fptosi <2 x double> undef to <2 x i8> - ; CHECK: cost of 8 {{.*}} fptoui + ; CHECK: cost of 16 {{.*}} fptoui %r104 = fptoui <2 x double> undef to <2 x i16> - ; CHECK: cost of 8 {{.*}} fptosi + ; CHECK: cost of 16 {{.*}} fptosi %r105 = fptosi <2 x double> undef to <2 x i16> ; CHECK: cost of 2 {{.*}} fptoui %r106 = fptoui <2 x double> undef to <2 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r107 = fptosi <2 x double> undef to <2 x i32> - ; CHECK: cost of 28 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r108 = fptoui <2 x double> undef to <2 x i64> - ; CHECK: cost of 28 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r109 = fptosi <2 x double> undef to <2 x i64> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r110 = fptoui <4 x float> undef to <4 x i1> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r111 = fptosi <4 x float> undef to <4 x i1> ; CHECK: cost of 3 {{.*}} fptoui %r112 = fptoui <4 x float> undef to <4 x i8> @@ -263,39 +263,39 @@ define i32 @casts() { %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: cost of 56 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: cost of 56 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r119 = fptosi <4 x float> undef to <4 x i64> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r120 = fptoui <4 x double> undef to <4 x i1> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r121 = fptosi <4 x double> undef to <4 x i1> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r122 = fptoui <4 x double> undef to <4 x i8> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r123 = fptosi <4 x double> undef to <4 x i8> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r124 = fptoui <4 x double> undef to <4 x i16> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r125 = fptosi <4 x double> undef to <4 x i16> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r126 = fptoui <4 x double> undef to <4 x i32> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: cost of 56 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: cost of 56 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r129 = fptosi <4 x double> undef to <4 x i64> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r130 = fptoui <8 x float> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r131 = fptosi <8 x float> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r132 = fptoui <8 x float> undef to <8 x i8> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r133 = fptosi <8 x float> undef to <8 x i8> ; CHECK: cost of 4 {{.*}} fptoui %r134 = fptoui <8 x float> undef to <8 x i16> @@ -305,39 +305,39 @@ define i32 @casts() { %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: cost of 112 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: cost of 112 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r139 = fptosi <8 x float> undef to <8 x i64> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r140 = fptoui <8 x double> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r141 = fptosi <8 x double> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r142 = fptoui <8 x double> undef to <8 x i8> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r143 = fptosi <8 x double> undef to <8 x i8> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r144 = fptoui <8 x double> undef to <8 x i16> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r145 = fptosi <8 x double> undef to <8 x i16> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r146 = fptoui <8 x double> undef to <8 x i32> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: cost of 112 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: cost of 112 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r149 = fptosi <8 x double> undef to <8 x i64> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r150 = fptoui <16 x float> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r151 = fptosi <16 x float> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r152 = fptoui <16 x float> undef to <16 x i8> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r153 = fptosi <16 x float> undef to <16 x i8> ; CHECK: cost of 8 {{.*}} fptoui %r154 = fptoui <16 x float> undef to <16 x i16> @@ -347,30 +347,30 @@ define i32 @casts() { %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK: cost of 4 {{.*}} fptosi %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: cost of 224 {{.*}} fptoui + ; CHECK: cost of 256 {{.*}} fptoui %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: cost of 224 {{.*}} fptosi + ; CHECK: cost of 256 {{.*}} fptosi %r159 = fptosi <16 x float> undef to <16 x i64> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r160 = fptoui <16 x double> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r161 = fptosi <16 x double> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r162 = fptoui <16 x double> undef to <16 x i8> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r163 = fptosi <16 x double> undef to <16 x i8> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r164 = fptoui <16 x double> undef to <16 x i16> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r165 = fptosi <16 x double> undef to <16 x i16> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r166 = fptoui <16 x double> undef to <16 x i32> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: cost of 224 {{.*}} fptoui + ; CHECK: cost of 256 {{.*}} fptoui %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: cost of 224 {{.*}} fptosi + ; CHECK: cost of 256 {{.*}} fptosi %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK: cost of 8 {{.*}} uitofp -- 2.34.1