test/CodeGen/AArch64/arm64-abi_align.ll

   1 ; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
   2 ; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
   3 target triple = "arm64-apple-darwin"
   4
   5 ; rdar://12648441
   6 ; Generated from arm64-arguments.c with -O2.
   7 ; Test passing structs with size < 8, < 16 and > 16
   8 ; with alignment of 16 and without
   9
  10 ; Structs with size < 8
  11 %struct.s38 = type { i32, i16 }
  12 ; With alignment of 16, the size will be padded to multiple of 16 bytes.
  13 %struct.s39 = type { i32, i16, [10 x i8] }
  14 ; Structs with size < 16
  15 %struct.s40 = type { i32, i16, i32, i16 }
  16 %struct.s41 = type { i32, i16, i32, i16 }
  17 ; Structs with size > 16
  18 %struct.s42 = type { i32, i16, i32, i16, i32, i16 }
  19 %struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
  20
  21 @g38 = common global %struct.s38 zeroinitializer, align 4
  22 @g38_2 = common global %struct.s38 zeroinitializer, align 4
  23 @g39 = common global %struct.s39 zeroinitializer, align 16
  24 @g39_2 = common global %struct.s39 zeroinitializer, align 16
  25 @g40 = common global %struct.s40 zeroinitializer, align 4
  26 @g40_2 = common global %struct.s40 zeroinitializer, align 4
  27 @g41 = common global %struct.s41 zeroinitializer, align 16
  28 @g41_2 = common global %struct.s41 zeroinitializer, align 16
  29 @g42 = common global %struct.s42 zeroinitializer, align 4
  30 @g42_2 = common global %struct.s42 zeroinitializer, align 4
  31 @g43 = common global %struct.s43 zeroinitializer, align 16
  32 @g43_2 = common global %struct.s43 zeroinitializer, align 16
  33
  34 ; structs with size < 8 bytes, passed via i64 in x1 and x2
  35 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
  36 entry:
  37 ; CHECK: f38
  38 ; CHECK: add w[[A:[0-9]+]], w1, w0
  39 ; CHECK: add {{w[0-9]+}}, w[[A]], w2
  40   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
  41   %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
  42   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
  43   %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
  44   %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
  45   %sext = trunc i64 %sext8 to i32
  46   %conv = ashr exact i32 %sext, 16
  47   %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
  48   %sext10 = trunc i64 %sext1011 to i32
  49   %conv6 = ashr exact i32 %sext10, 16
  50   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
  51   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
  52   %add4 = add i32 %add3, %conv
  53   %add7 = add i32 %add4, %conv6
  54   ret i32 %add7
  55 }
  56
  57 define i32 @caller38() #1 {
  58 entry:
  59 ; CHECK: caller38
  60 ; CHECK: ldr x1,
  61 ; CHECK: ldr x2,
  62   %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
  63   %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
  64   %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
  65   ret i32 %call
  66 }
  67
  68 declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
  69                 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
  70
  71 ; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
  72 ; i9 at [sp]
  73 define i32 @caller38_stack() #1 {
  74 entry:
  75 ; CHECK: caller38_stack
  76 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
  77 ; CHECK: movz w[[C:[0-9]+]], #0x9
  78 ; CHECK: str w[[C]], [sp]
  79   %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
  80   %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
  81   %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
  82                                    i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
  83   ret i32 %call
  84 }
  85
  86 ; structs with size < 8 bytes, alignment of 16
  87 ; passed via i128 in x1 and x3
  88 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
  89 entry:
  90 ; CHECK: f39
  91 ; CHECK: add w[[A:[0-9]+]], w1, w0
  92 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
  93   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
  94   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
  95   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
  96   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
  97   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
  98   %sext = trunc i128 %sext8 to i32
  99   %conv = ashr exact i32 %sext, 16
 100   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
 101   %sext10 = trunc i128 %sext1011 to i32
 102   %conv6 = ashr exact i32 %sext10, 16
 103   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
 104   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
 105   %add4 = add i32 %add3, %conv
 106   %add7 = add i32 %add4, %conv6
 107   ret i32 %add7
 108 }
 109
 110 define i32 @caller39() #1 {
 111 entry:
 112 ; CHECK: caller39
 113 ; CHECK: ldp x1, x2,
 114 ; CHECK: ldp x3, x4,
 115   %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
 116   %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 117   %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
 118   ret i32 %call
 119 }
 120
 121 declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 122                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
 123
 124 ; structs with size < 8 bytes, alignment 16
 125 ; passed on stack at [sp+16] and [sp+32]
 126 define i32 @caller39_stack() #1 {
 127 entry:
 128 ; CHECK: caller39_stack
 129 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
 130 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 131 ; CHECK: movz w[[C:[0-9]+]], #0x9
 132 ; CHECK: str w[[C]], [sp]
 133   %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
 134   %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 135   %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
 136                                    i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
 137   ret i32 %call
 138 }
 139
 140 ; structs with size < 16 bytes
 141 ; passed via i128 in x1 and x3
 142 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
 143 entry:
 144 ; CHECK: f40
 145 ; CHECK: add w[[A:[0-9]+]], w1, w0
 146 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
 147   %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
 148   %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
 149   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
 150   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
 151   %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
 152   %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
 153   %sext = trunc i64 %sext8 to i32
 154   %conv = ashr exact i32 %sext, 16
 155   %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
 156   %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
 157   %sext10 = trunc i64 %sext1011 to i32
 158   %conv6 = ashr exact i32 %sext10, 16
 159   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
 160   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
 161   %add4 = add i32 %add3, %conv
 162   %add7 = add i32 %add4, %conv6
 163   ret i32 %add7
 164 }
 165
 166 define i32 @caller40() #1 {
 167 entry:
 168 ; CHECK: caller40
 169 ; CHECK: ldp x1, x2,
 170 ; CHECK: ldp x3, x4,
 171   %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
 172   %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 173   %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
 174   ret i32 %call
 175 }
 176
 177 declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 178                 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
 179
 180 ; structs with size < 16 bytes
 181 ; passed on stack at [sp+8] and [sp+24]
 182 define i32 @caller40_stack() #1 {
 183 entry:
 184 ; CHECK: caller40_stack
 185 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
 186 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
 187 ; CHECK: movz w[[C:[0-9]+]], #0x9
 188 ; CHECK: str w[[C]], [sp]
 189   %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
 190   %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 191   %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
 192                          i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
 193   ret i32 %call
 194 }
 195
 196 ; structs with size < 16 bytes, alignment of 16
 197 ; passed via i128 in x1 and x3
 198 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
 199 entry:
 200 ; CHECK: f41
 201 ; CHECK: add w[[A:[0-9]+]], w1, w0
 202 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
 203   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
 204   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
 205   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
 206   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
 207   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
 208   %sext = trunc i128 %sext8 to i32
 209   %conv = ashr exact i32 %sext, 16
 210   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
 211   %sext10 = trunc i128 %sext1011 to i32
 212   %conv6 = ashr exact i32 %sext10, 16
 213   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
 214   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
 215   %add4 = add i32 %add3, %conv
 216   %add7 = add i32 %add4, %conv6
 217   ret i32 %add7
 218 }
 219
 220 define i32 @caller41() #1 {
 221 entry:
 222 ; CHECK: caller41
 223 ; CHECK: ldp x1, x2,
 224 ; CHECK: ldp x3, x4,
 225   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
 226   %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 227   %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
 228   ret i32 %call
 229 }
 230
 231 declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 232                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
 233
 234 ; structs with size < 16 bytes, alignment of 16
 235 ; passed on stack at [sp+16] and [sp+32]
 236 define i32 @caller41_stack() #1 {
 237 entry:
 238 ; CHECK: caller41_stack
 239 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
 240 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 241 ; CHECK: movz w[[C:[0-9]+]], #0x9
 242 ; CHECK: str w[[C]], [sp]
 243   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
 244   %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 245   %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
 246                             i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
 247   ret i32 %call
 248 }
 249
 250 ; structs with size of 22 bytes, passed indirectly in x1 and x2
 251 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
 252 entry:
 253 ; CHECK: f42
 254 ; CHECK: ldr w[[A:[0-9]+]], [x1]
 255 ; CHECK: ldr w[[B:[0-9]+]], [x2]
 256 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
 257 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
 258 ; FAST: f42
 259 ; FAST: ldr w[[A:[0-9]+]], [x1]
 260 ; FAST: ldr w[[B:[0-9]+]], [x2]
 261 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
 262 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
 263   %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
 264   %0 = load i32* %i1, align 4, !tbaa !0
 265   %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
 266   %1 = load i32* %i2, align 4, !tbaa !0
 267   %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
 268   %2 = load i16* %s, align 2, !tbaa !3
 269   %conv = sext i16 %2 to i32
 270   %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
 271   %3 = load i16* %s5, align 2, !tbaa !3
 272   %conv6 = sext i16 %3 to i32
 273   %add = add i32 %0, %i
 274   %add3 = add i32 %add, %1
 275   %add4 = add i32 %add3, %conv
 276   %add7 = add i32 %add4, %conv6
 277   ret i32 %add7
 278 }
 279
 280 ; For s1, we allocate a 22-byte space, pass its address via x1
 281 define i32 @caller42() #3 {
 282 entry:
 283 ; CHECK: caller42
 284 ; CHECK: str {{x[0-9]+}}, [sp, #48]
 285 ; CHECK: str {{q[0-9]+}}, [sp, #32]
 286 ; CHECK: str {{x[0-9]+}}, [sp, #16]
 287 ; CHECK: str {{q[0-9]+}}, [sp]
 288 ; CHECK: add x1, sp, #32
 289 ; CHECK: mov x2, sp
 290 ; Space for s1 is allocated at sp+32
 291 ; Space for s2 is allocated at sp
 292
 293 ; FAST: caller42
 294 ; FAST: sub sp, sp, #96
 295 ; Space for s1 is allocated at fp-24 = sp+72
 296 ; Space for s2 is allocated at sp+48
 297 ; FAST: sub x[[A:[0-9]+]], x29, #24
 298 ; FAST: add x[[A:[0-9]+]], sp, #48
 299 ; Call memcpy with size = 24 (0x18)
 300 ; FAST: orr {{x[0-9]+}}, xzr, #0x18
 301   %tmp = alloca %struct.s42, align 4
 302   %tmp1 = alloca %struct.s42, align 4
 303   %0 = bitcast %struct.s42* %tmp to i8*
 304   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
 305   %1 = bitcast %struct.s42* %tmp1 to i8*
 306   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
 307   %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
 308   ret i32 %call
 309 }
 310
 311 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
 312
 313 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 314                        i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
 315                        %struct.s42* nocapture %s2) #2
 316
 317 define i32 @caller42_stack() #3 {
 318 entry:
 319 ; CHECK: caller42_stack
 320 ; CHECK: mov x29, sp
 321 ; CHECK: sub sp, sp, #96
 322 ; CHECK: stur {{x[0-9]+}}, [x29, #-16]
 323 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
 324 ; CHECK: str {{x[0-9]+}}, [sp, #48]
 325 ; CHECK: str {{q[0-9]+}}, [sp, #32]
 326 ; Space for s1 is allocated at x29-32 = sp+64
 327 ; Space for s2 is allocated at sp+32
 328 ; CHECK: add x[[B:[0-9]+]], sp, #32
 329 ; CHECK: str x[[B]], [sp, #16]
 330 ; CHECK: sub x[[A:[0-9]+]], x29, #32
 331 ; Address of s1 is passed on stack at sp+8
 332 ; CHECK: str x[[A]], [sp, #8]
 333 ; CHECK: movz w[[C:[0-9]+]], #0x9
 334 ; CHECK: str w[[C]], [sp]
 335
 336 ; FAST: caller42_stack
 337 ; Space for s1 is allocated at fp-24
 338 ; Space for s2 is allocated at fp-48
 339 ; FAST: sub x[[A:[0-9]+]], x29, #24
 340 ; FAST: sub x[[B:[0-9]+]], x29, #48
 341 ; Call memcpy with size = 24 (0x18)
 342 ; FAST: orr {{x[0-9]+}}, xzr, #0x18
 343 ; FAST: str {{w[0-9]+}}, [sp]
 344 ; Address of s1 is passed on stack at sp+8
 345 ; FAST: str {{x[0-9]+}}, [sp, #8]
 346 ; FAST: str {{x[0-9]+}}, [sp, #16]
 347   %tmp = alloca %struct.s42, align 4
 348   %tmp1 = alloca %struct.s42, align 4
 349   %0 = bitcast %struct.s42* %tmp to i8*
 350   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
 351   %1 = bitcast %struct.s42* %tmp1 to i8*
 352   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
 353   %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
 354                        i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
 355   ret i32 %call
 356 }
 357
 358 ; structs with size of 22 bytes, alignment of 16
 359 ; passed indirectly in x1 and x2
 360 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
 361 entry:
 362 ; CHECK: f43
 363 ; CHECK: ldr w[[A:[0-9]+]], [x1]
 364 ; CHECK: ldr w[[B:[0-9]+]], [x2]
 365 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
 366 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
 367 ; FAST: f43
 368 ; FAST: ldr w[[A:[0-9]+]], [x1]
 369 ; FAST: ldr w[[B:[0-9]+]], [x2]
 370 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
 371 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
 372   %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
 373   %0 = load i32* %i1, align 4, !tbaa !0
 374   %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
 375   %1 = load i32* %i2, align 4, !tbaa !0
 376   %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
 377   %2 = load i16* %s, align 2, !tbaa !3
 378   %conv = sext i16 %2 to i32
 379   %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
 380   %3 = load i16* %s5, align 2, !tbaa !3
 381   %conv6 = sext i16 %3 to i32
 382   %add = add i32 %0, %i
 383   %add3 = add i32 %add, %1
 384   %add4 = add i32 %add3, %conv
 385   %add7 = add i32 %add4, %conv6
 386   ret i32 %add7
 387 }
 388
 389 define i32 @caller43() #3 {
 390 entry:
 391 ; CHECK: caller43
 392 ; CHECK: str {{q[0-9]+}}, [sp, #48]
 393 ; CHECK: str {{q[0-9]+}}, [sp, #32]
 394 ; CHECK: str {{q[0-9]+}}, [sp, #16]
 395 ; CHECK: str {{q[0-9]+}}, [sp]
 396 ; CHECK: add x1, sp, #32
 397 ; CHECK: mov x2, sp
 398 ; Space for s1 is allocated at sp+32
 399 ; Space for s2 is allocated at sp
 400
 401 ; FAST: caller43
 402 ; FAST: mov x29, sp
 403 ; Space for s1 is allocated at sp+32
 404 ; Space for s2 is allocated at sp
 405 ; FAST: add x1, sp, #32
 406 ; FAST: mov x2, sp
 407 ; FAST: str {{x[0-9]+}}, [sp, #32]
 408 ; FAST: str {{x[0-9]+}}, [sp, #40]
 409 ; FAST: str {{x[0-9]+}}, [sp, #48]
 410 ; FAST: str {{x[0-9]+}}, [sp, #56]
 411 ; FAST: str {{x[0-9]+}}, [sp]
 412 ; FAST: str {{x[0-9]+}}, [sp, #8]
 413 ; FAST: str {{x[0-9]+}}, [sp, #16]
 414 ; FAST: str {{x[0-9]+}}, [sp, #24]
 415   %tmp = alloca %struct.s43, align 16
 416   %tmp1 = alloca %struct.s43, align 16
 417   %0 = bitcast %struct.s43* %tmp to i8*
 418   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
 419   %1 = bitcast %struct.s43* %tmp1 to i8*
 420   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
 421   %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
 422   ret i32 %call
 423 }
 424
 425 declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 426                        i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
 427                        %struct.s43* nocapture %s2) #2
 428
 429 define i32 @caller43_stack() #3 {
 430 entry:
 431 ; CHECK: caller43_stack
 432 ; CHECK: mov x29, sp
 433 ; CHECK: sub sp, sp, #96
 434 ; CHECK: stur {{q[0-9]+}}, [x29, #-16]
 435 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
 436 ; CHECK: str {{q[0-9]+}}, [sp, #48]
 437 ; CHECK: str {{q[0-9]+}}, [sp, #32]
 438 ; Space for s1 is allocated at x29-32 = sp+64
 439 ; Space for s2 is allocated at sp+32
 440 ; CHECK: add x[[B:[0-9]+]], sp, #32
 441 ; CHECK: str x[[B]], [sp, #16]
 442 ; CHECK: sub x[[A:[0-9]+]], x29, #32
 443 ; Address of s1 is passed on stack at sp+8
 444 ; CHECK: str x[[A]], [sp, #8]
 445 ; CHECK: movz w[[C:[0-9]+]], #0x9
 446 ; CHECK: str w[[C]], [sp]
 447
 448 ; FAST: caller43_stack
 449 ; FAST: sub sp, sp, #96
 450 ; Space for s1 is allocated at fp-32 = sp+64
 451 ; Space for s2 is allocated at sp+32
 452 ; FAST: sub x[[A:[0-9]+]], x29, #32
 453 ; FAST: add x[[B:[0-9]+]], sp, #32
 454 ; FAST: stur {{x[0-9]+}}, [x29, #-32]
 455 ; FAST: stur {{x[0-9]+}}, [x29, #-24]
 456 ; FAST: stur {{x[0-9]+}}, [x29, #-16]
 457 ; FAST: stur {{x[0-9]+}}, [x29, #-8]
 458 ; FAST: str {{x[0-9]+}}, [sp, #32]
 459 ; FAST: str {{x[0-9]+}}, [sp, #40]
 460 ; FAST: str {{x[0-9]+}}, [sp, #48]
 461 ; FAST: str {{x[0-9]+}}, [sp, #56]
 462 ; FAST: str {{w[0-9]+}}, [sp]
 463 ; Address of s1 is passed on stack at sp+8
 464 ; FAST: str {{x[0-9]+}}, [sp, #8]
 465 ; FAST: str {{x[0-9]+}}, [sp, #16]
 466   %tmp = alloca %struct.s43, align 16
 467   %tmp1 = alloca %struct.s43, align 16
 468   %0 = bitcast %struct.s43* %tmp to i8*
 469   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
 470   %1 = bitcast %struct.s43* %tmp1 to i8*
 471   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
 472   %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
 473                        i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
 474   ret i32 %call
 475 }
 476
 477 ; rdar://13668927
 478 ; Check that we don't split an i128.
 479 declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 480                                i32 %i6, i32 %i7, i128 %s1, i32 %i8)
 481
 482 define i32 @i128_split() {
 483 entry:
 484 ; CHECK: i128_split
 485 ; "i128 %0" should be on stack at [sp].
 486 ; "i32 8" should be on stack at [sp, #16].
 487 ; CHECK: str {{w[0-9]+}}, [sp, #16]
 488 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
 489 ; FAST: i128_split
 490 ; FAST: sub sp, sp, #48
 491 ; FAST: mov x[[ADDR:[0-9]+]], sp
 492 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
 493 ; Load/Store opt is disabled with -O0, so the i128 is split.
 494 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
 495 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
 496   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
 497   %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
 498                                            i32 6, i32 7, i128 %0, i32 8) #5
 499   ret i32 %call
 500 }
 501
 502 declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 503                                i32 %i6, i32 %i7, i64 %s1, i32 %i8)
 504
 505 define i32 @i64_split() {
 506 entry:
 507 ; CHECK: i64_split
 508 ; "i64 %0" should be in register x7.
 509 ; "i32 8" should be on stack at [sp].
 510 ; CHECK: ldr x7, [{{x[0-9]+}}]
 511 ; CHECK: str {{w[0-9]+}}, [sp]
 512 ; FAST: i64_split
 513 ; FAST: ldr x7, [{{x[0-9]+}}]
 514 ; FAST: str {{w[0-9]+}}, [sp]
 515   %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
 516   %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
 517                                     i32 6, i32 7, i64 %0, i32 8) #5
 518   ret i32 %call
 519 }
 520
 521 attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 522 attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 523 attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 524 attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
 525 attributes #4 = { nounwind }
 526 attributes #5 = { nobuiltin }
 527
 528 !0 = metadata !{metadata !"int", metadata !1}
 529 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 530 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 531 !3 = metadata !{metadata !"short", metadata !1}
 532 !4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}