AArch64: use 32-bit MOV rather than UBFX to truncate registers.

author Tim Northover <tnorthover@apple.com>

Wed, 29 Jul 2015 21:34:32 +0000 (21:34 +0000)

committer Tim Northover <tnorthover@apple.com>

Wed, 29 Jul 2015 21:34:32 +0000 (21:34 +0000)
author Tim Northover <tnorthover@apple.com>
Wed, 29 Jul 2015 21:34:32 +0000 (21:34 +0000)
committer Tim Northover <tnorthover@apple.com>
Wed, 29 Jul 2015 21:34:32 +0000 (21:34 +0000)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index 867b95b566a63d4cf7aec43d54d0f43aacc8f1b8..48ac7b8f68190c1f2e6adece575160cd3f08a4a0 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5148,10 +5148,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
  def : Pat<(i64 (anyext GPR32:$src)),
            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
  
-// When we need to explicitly zero-extend, we use an unsigned bitfield move
-// instruction (UBFM) on the enclosing super-reg.
+// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
+// then assert the extension has happened.
  def : Pat<(i64 (zext GPR32:$src)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
  
  // To sign extend, we use a signed bitfield move instruction (SBFM) on the
  // containing super-reg.
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll

index 739570236da92634d22e4f127db769a9ddfc505b..83b9d0a30ae0258854071b0908ed75c3704dda27 100644 (file)
--- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -252,11 +252,11 @@ entry:
  ; CHECK: ldr   w[[IARG:[0-9]+]], [x29, #24]
  ; CHECK: ldr   d[[DARG:[0-9]+]], [x29, #40]
  ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: ubfx  x9, x0, #0, #32
+; CHECK: mov   w9, w0
+; CHECK: mov    x10, sp
  ; CHECK: lsl   x9, x9, #2
  ; CHECK: add   x9, x9, #15
  ; CHECK: and   x9, x9, #0x7fffffff0
-; CHECK: mov    x10, sp
  ; CHECK: sub    x[[VLASPTMP:[0-9]+]], x10, x9
  ; CHECK: mov    sp, x[[VLASPTMP]]
  ;   Check correct access to local variable, through frame pointer
@@ -299,11 +299,11 @@ entry:
  ; CHECK: ldr   w[[IARG:[0-9]+]], [x29, #24]
  ; CHECK: ldr   d[[DARG:[0-9]+]], [x29, #40]
  ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: ubfx  x9, x0, #0, #32
+; CHECK: mov   w9, w0
+; CHECK: mov    x10, sp
  ; CHECK: lsl   x9, x9, #2
  ; CHECK: add   x9, x9, #15
  ; CHECK: and   x9, x9, #0x7fffffff0
-; CHECK: mov    x10, sp
  ; CHECK: sub    x[[VLASPTMP:[0-9]+]], x10, x9
  ; CHECK: mov    sp, x[[VLASPTMP]]
  ;   Check correct access to local variable, through frame pointer
@@ -361,11 +361,11 @@ entry:
  ; CHECK: ldr   d[[DARG:[0-9]+]], [x29, #40]
  ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
  ;   and set-up of base pointer (x19).
-; CHECK: ubfx  x9, x0, #0, #32
+; CHECK: mov   w9, w0
+; CHECK: mov    x10, sp
  ; CHECK: lsl   x9, x9, #2
  ; CHECK: add   x9, x9, #15
  ; CHECK: and   x9, x9, #0x7fffffff0
-; CHECK: mov    x10, sp
  ; CHECK: sub    x[[VLASPTMP:[0-9]+]], x10, x9
  ; CHECK: mov    sp, x[[VLASPTMP]]
  ;   Check correct access to local variable, through base pointer
@@ -414,11 +414,11 @@ entry:
  ; CHECK: ldr   d[[DARG:[0-9]+]], [x29, #40]
  ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
  ;   and set-up of base pointer (x19).
-; CHECK: ubfx  x9, x0, #0, #32
+; CHECK: mov   w9, w0
+; CHECK: mov    x10, sp
  ; CHECK: lsl   x9, x9, #2
  ; CHECK: add   x9, x9, #15
  ; CHECK: and   x9, x9, #0x7fffffff0
-; CHECK: mov    x10, sp
  ; CHECK: sub    x[[VLASPTMP:[0-9]+]], x10, x9
  ; CHECK: mov    sp, x[[VLASPTMP]]
  ;   Check correct access to local variable, through base pointer
@@ -465,11 +465,11 @@ entry:
  ; CHECK: ldr   d[[DARG:[0-9]+]], [x29, #40]
  ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
  ;   and set-up of base pointer (x19).
-; CHECK: ubfx  x9, x0, #0, #32
+; CHECK: mov   w9, w0
+; CHECK: mov    x10, sp
  ; CHECK: lsl   x9, x9, #2
  ; CHECK: add   x9, x9, #15
  ; CHECK: and   x9, x9, #0x7fffffff0
-; CHECK: mov    x10, sp
  ; CHECK: sub    x[[VLASPTMP:[0-9]+]], x10, x9
  ; CHECK: mov    sp, x[[VLASPTMP]]
  ;   Check correct access to local variable, through base pointer
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll

index d0880cd4f3eb567fd9f46bc2ef8267249aedf7ab..f345acf453d7f82a00a11d8184fd34fc7ee43863 100644 (file)
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -78,8 +78,8 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
  
    %ext_int = zext i32 %int to i64
    store volatile i64 %ext_int, i64* @var64
-; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: mov w[[EXT:[0-9]+]], w3
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
  
    ret void
  }
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll

index b0b529a13f413905c6e1bd760649b28edb987343..d6c9471b7a19a0724aaa7ca4ba14ff5ce74fe6c6 100644 (file)
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -4,8 +4,8 @@
  define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
    %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
    ret i32 %cnt
-; CHECK: ubfx  x{{[0-9]+}}
-; CHECK: fmov  d0, x{{[0-9]+}}
+; CHECK: mov w[[IN64:[0-9]+]], w0
+; CHECK: fmov  d0, x[[IN64]]
  ; CHECK: cnt.8b        v0, v0
  ; CHECK: uaddlv.8b     h0, v0
  ; CHECK: fmov w0, s0
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll

index 78399c80b5de29cb3e890579a7711c0c0858f169..e1e4f62f6622ae2aa0f9db23656d3109d48b32a8 100644 (file)
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -60,7 +60,7 @@ define void @test_extendw(i32 %var) {
  
    %uxt64 = zext i32 %var to i64
    store volatile i64 %uxt64, i64* @var64
-; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
+; CHECK: mov {{w[0-9]+}}, w0
    ret void
  }
author	Tim Northover <tnorthover@apple.com>
	Wed, 29 Jul 2015 21:34:32 +0000 (21:34 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Wed, 29 Jul 2015 21:34:32 +0000 (21:34 +0000)
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll		patch \| blob \| history
test/CodeGen/AArch64/arm64-aapcs.ll		patch \| blob \| history
test/CodeGen/AArch64/arm64-popcnt.ll		patch \| blob \| history
test/CodeGen/AArch64/bitfield.ll		patch \| blob \| history